about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-x.custom-format.py63
-rw-r--r--.github/ISSUE_TEMPLATE/bug_report.md31
-rw-r--r--.github/ISSUE_TEMPLATE/feature_request.md20
-rw-r--r--Android.bp4
-rw-r--r--Dockerfile23
-rw-r--r--GNUmakefile29
-rw-r--r--GNUmakefile.llvm15
-rw-r--r--README.md68
-rwxr-xr-xafl-cmin12
-rwxr-xr-xafl-cmin.bash4
-rwxr-xr-xafl-system-config1
-rw-r--r--docs/Changelog.md51
-rw-r--r--docs/env_variables.md36
-rw-r--r--docs/ideas.md49
-rw-r--r--docs/rpc_statsd.md143
-rw-r--r--docs/statsd/grafana-afl++.json1816
-rw-r--r--docs/visualization/statsd-grafana.pngbin0 -> 163646 bytes
-rw-r--r--include/afl-fuzz.h34
-rw-r--r--include/android-ashmem.h84
-rw-r--r--include/cmplog.h23
-rw-r--r--include/common.h1
-rw-r--r--include/config.h38
-rw-r--r--include/coverage-32.h2
-rw-r--r--include/coverage-64.h4
-rw-r--r--include/debug.h24
-rw-r--r--include/envs.h6
-rw-r--r--include/forkserver.h5
-rw-r--r--include/sharedmem.h1
-rw-r--r--include/types.h36
-rw-r--r--instrumentation/LLVMInsTrim.so.cc2
-rw-r--r--instrumentation/README.instrument_list.md9
-rw-r--r--instrumentation/SanitizerCoverageLTO.so.cc22
-rw-r--r--instrumentation/SanitizerCoveragePCGUARD.so.cc46
-rw-r--r--instrumentation/afl-compiler-rt.o.c448
-rw-r--r--instrumentation/afl-llvm-common.cc2
-rw-r--r--instrumentation/afl-llvm-dict2file.so.cc1
-rw-r--r--instrumentation/afl-llvm-lto-instrumentation.so.cc15
-rw-r--r--instrumentation/afl-llvm-pass.so.cc1
-rw-r--r--instrumentation/cmplog-instructions-pass.cc604
-rw-r--r--instrumentation/cmplog-routines-pass.cc216
-rw-r--r--instrumentation/compare-transform-pass.so.cc36
-rw-r--r--instrumentation/llvm-ngram-coverage.h2
-rw-r--r--instrumentation/split-compares-pass.so.cc37
-rw-r--r--qemu_mode/QEMUAFL_VERSION2
-rwxr-xr-xqemu_mode/build_qemu_support.sh39
-rw-r--r--qemu_mode/libcompcov/libcompcov.so.c17
-rw-r--r--qemu_mode/libqasan/Makefile44
-rw-r--r--qemu_mode/libqasan/README.md28
-rw-r--r--qemu_mode/libqasan/dlmalloc.c7328
-rw-r--r--qemu_mode/libqasan/hooks.c662
-rw-r--r--qemu_mode/libqasan/libqasan.c94
-rw-r--r--qemu_mode/libqasan/libqasan.h132
-rw-r--r--qemu_mode/libqasan/malloc.c364
-rw-r--r--qemu_mode/libqasan/map_macro.h74
-rw-r--r--qemu_mode/libqasan/patch.c243
-rw-r--r--qemu_mode/libqasan/string.c339
-rw-r--r--qemu_mode/libqasan/uninstrument.c83
m---------qemu_mode/qemuafl0
-rw-r--r--qemu_mode/unsigaction/Makefile12
-rw-r--r--src/afl-analyze.c25
-rw-r--r--src/afl-cc.c186
-rw-r--r--src/afl-common.c66
-rw-r--r--src/afl-forkserver.c96
-rw-r--r--src/afl-fuzz-bitmap.c9
-rw-r--r--src/afl-fuzz-cmplog.c2
-rw-r--r--src/afl-fuzz-extras.c2
-rw-r--r--src/afl-fuzz-init.c218
-rw-r--r--src/afl-fuzz-one.c108
-rw-r--r--src/afl-fuzz-queue.c47
-rw-r--r--src/afl-fuzz-redqueen.c2283
-rw-r--r--src/afl-fuzz-run.c4
-rw-r--r--src/afl-fuzz-state.c8
-rw-r--r--src/afl-fuzz-stats.c123
-rw-r--r--src/afl-fuzz-statsd.c63
-rw-r--r--src/afl-fuzz.c301
-rw-r--r--src/afl-ld-lto.c21
-rw-r--r--src/afl-sharedmem.c14
-rw-r--r--src/afl-showmap.c121
-rw-r--r--src/afl-tmin.c83
-rw-r--r--test-instr.c5
-rwxr-xr-xtest/test-basic.sh26
-rwxr-xr-xtest/test-gcc-plugin.sh18
-rwxr-xr-xtest/test-libextensions.sh10
-rwxr-xr-xtest/test-llvm-lto.sh10
-rwxr-xr-xtest/test-llvm.sh16
-rwxr-xr-xtest/test-qemu-mode.sh10
-rw-r--r--unicorn_mode/UNICORNAFL_VERSION2
-rwxr-xr-xunicorn_mode/build_unicorn_support.sh7
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_gdb.py108
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_ida.py207
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_lldb.py241
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py143
-rw-r--r--unicorn_mode/samples/compcov_x64/compcov_test_harness.py76
-rw-r--r--unicorn_mode/samples/simple/simple_test_harness.py81
-rw-r--r--unicorn_mode/samples/simple/simple_test_harness_alt.py100
-rw-r--r--unicorn_mode/samples/speedtest/.gitignore6
-rw-r--r--unicorn_mode/samples/speedtest/Makefile17
-rw-r--r--unicorn_mode/samples/speedtest/README.md65
-rw-r--r--unicorn_mode/samples/speedtest/c/Makefile54
-rw-r--r--unicorn_mode/samples/speedtest/c/harness.c390
-rw-r--r--unicorn_mode/samples/speedtest/get_offsets.py77
-rw-r--r--unicorn_mode/samples/speedtest/python/Makefile8
-rw-r--r--unicorn_mode/samples/speedtest/python/harness.py277
-rw-r--r--unicorn_mode/samples/speedtest/rust/.gitignore2
-rw-r--r--unicorn_mode/samples/speedtest/rust/Cargo.toml15
-rw-r--r--unicorn_mode/samples/speedtest/rust/Makefile17
-rw-r--r--unicorn_mode/samples/speedtest/rust/src/main.rs232
-rw-r--r--unicorn_mode/samples/speedtest/sample_inputs/a1
-rw-r--r--unicorn_mode/samples/speedtest/target.c77
m---------unicorn_mode/unicornafl0
-rwxr-xr-xunicorn_mode/update_uc_ref.sh2
-rw-r--r--utils/afl_frida/README.md2
-rw-r--r--utils/afl_frida/afl-frida.c173
-rw-r--r--utils/afl_untracer/afl-untracer.c15
-rw-r--r--utils/afl_untracer/ida_get_patchpoints.py17
-rw-r--r--utils/aflpp_driver/aflpp_driver.c3
-rw-r--r--utils/custom_mutators/XmlMutatorMin.py110
-rw-r--r--utils/custom_mutators/common.py12
-rw-r--r--utils/custom_mutators/example.py13
-rw-r--r--utils/custom_mutators/simple-chunk-replace.py16
-rw-r--r--utils/custom_mutators/wrapper_afl_min.py13
-rw-r--r--utils/defork/defork.c1
122 files changed, 18437 insertions, 1543 deletions
diff --git a/.custom-format.py b/.custom-format.py
index fad74a69..346e4b07 100755
--- a/.custom-format.py
+++ b/.custom-format.py
@@ -33,13 +33,13 @@ if CLANG_FORMAT_BIN is None:
         o, _ = p.communicate()
         o = str(o, "utf-8")
         o = re.sub(r".*ersion ", "", o)
-        #o = o[len("clang-format version "):].strip()
-        o = o[:o.find(".")]
+        # o = o[len("clang-format version "):].strip()
+        o = o[: o.find(".")]
         o = int(o)
     except:
-        print ("clang-format-11 is needed. Aborted.")
+        print("clang-format-11 is needed. Aborted.")
         exit(1)
-    #if o < 7:
+    # if o < 7:
     #    if subprocess.call(['which', 'clang-format-7'], stdout=subprocess.PIPE) == 0:
     #        CLANG_FORMAT_BIN = 'clang-format-7'
     #    elif subprocess.call(['which', 'clang-format-8'], stdout=subprocess.PIPE) == 0:
@@ -52,8 +52,8 @@ if CLANG_FORMAT_BIN is None:
     #        print ("clang-format 7 or above is needed. Aborted.")
     #        exit(1)
     else:
-        CLANG_FORMAT_BIN = 'clang-format-11'
-            
+        CLANG_FORMAT_BIN = "clang-format-11"
+
 COLUMN_LIMIT = 80
 for line in fmt.split("\n"):
     line = line.split(":")
@@ -69,26 +69,47 @@ def custom_format(filename):
     in_define = False
     last_line = None
     out = ""
-    
+
     for line in src.split("\n"):
         if line.lstrip().startswith("#"):
-            if line[line.find("#")+1:].lstrip().startswith("define"):
+            if line[line.find("#") + 1 :].lstrip().startswith("define"):
                 in_define = True
-        
-        if "/*" in line and not line.strip().startswith("/*") and line.endswith("*/") and len(line) < (COLUMN_LIMIT-2):
+
+        if (
+            "/*" in line
+            and not line.strip().startswith("/*")
+            and line.endswith("*/")
+            and len(line) < (COLUMN_LIMIT - 2)
+        ):
             cmt_start = line.rfind("/*")
-            line = line[:cmt_start] + " " * (COLUMN_LIMIT-2 - len(line)) + line[cmt_start:]
+            line = (
+                line[:cmt_start]
+                + " " * (COLUMN_LIMIT - 2 - len(line))
+                + line[cmt_start:]
+            )
 
         define_padding = 0
         if last_line is not None and in_define and last_line.endswith("\\"):
             last_line = last_line[:-1]
-            define_padding = max(0, len(last_line[last_line.rfind("\n")+1:]))
+            define_padding = max(0, len(last_line[last_line.rfind("\n") + 1 :]))
 
-        if last_line is not None and last_line.strip().endswith("{") and line.strip() != "":
+        if (
+            last_line is not None
+            and last_line.strip().endswith("{")
+            and line.strip() != ""
+        ):
             line = (" " * define_padding + "\\" if in_define else "") + "\n" + line
-        elif last_line is not None and last_line.strip().startswith("}") and line.strip() != "":
+        elif (
+            last_line is not None
+            and last_line.strip().startswith("}")
+            and line.strip() != ""
+        ):
             line = (" " * define_padding + "\\" if in_define else "") + "\n" + line
-        elif line.strip().startswith("}") and last_line is not None and last_line.strip() != "":
+        elif (
+            line.strip().startswith("}")
+            and last_line is not None
+            and last_line.strip() != ""
+        ):
             line = (" " * define_padding + "\\" if in_define else "") + "\n" + line
 
         if not line.endswith("\\"):
@@ -97,14 +118,15 @@ def custom_format(filename):
         out += line + "\n"
         last_line = line
 
-    return (out)
+    return out
+
 
 args = sys.argv[1:]
 if len(args) == 0:
-    print ("Usage: ./format.py [-i] <filename>")
-    print ()
-    print (" The -i option, if specified, let the script to modify in-place")
-    print (" the source files. By default the results are written to stdout.")
+    print("Usage: ./format.py [-i] <filename>")
+    print()
+    print(" The -i option, if specified, let the script to modify in-place")
+    print(" the source files. By default the results are written to stdout.")
     print()
     exit(1)
 
@@ -120,4 +142,3 @@ for filename in args:
             f.write(code)
     else:
         print(code)
-
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..d62da0a8
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,31 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**IMPORTANT**
+1. You have verified that the issue to be present in the current `dev` branch
+2. Please supply the command line options and relevant environment variables, e.g. a copy-paste of the contents of `out/default/fuzzer_setup`
+
+Thank you for making afl++ better!
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. ...
+2. ...
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screen output/Screenshots**
+If applicable, add copy-paste of the screen output or screenshot that shows the issue. Please ensure the output is in **English** and not in Chinese, Russian, German, etc.
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..bbcbbe7d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/Android.bp b/Android.bp
index 5d6f0433..ee076d1e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -36,6 +36,7 @@ cc_defaults {
 cc_binary {
   name: "afl-fuzz",
   host_supported: true,
+  compile_multilib: "64",
 
   defaults: [
     "afl-defaults",
@@ -64,6 +65,7 @@ cc_binary {
     "src/afl-common.c",
     "src/afl-sharedmem.c",
     "src/afl-forkserver.c",
+    "src/afl-performance.c",
   ],
 }
 
@@ -152,7 +154,7 @@ cc_binary_host {
 
 cc_library_static {
   name: "afl-llvm-rt",
-  compile_multilib: "both",
+  compile_multilib: "64",
   vendor_available: true,
   host_supported: true,
   recovery_available: true,
diff --git a/Dockerfile b/Dockerfile
index d2a5ca55..1cb00d5d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -16,6 +16,7 @@ env NO_ARCH_OPT 1
 RUN apt-get update && \
     apt-get -y install --no-install-suggests --no-install-recommends \
     automake \
+    ninja-build \
     bison flex \
     build-essential \
     git \
@@ -28,7 +29,7 @@ RUN apt-get update && \
     gnuplot-nox \
     && rm -rf /var/lib/apt/lists/*
 
-RUN echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-11 main" >> /etc/apt/sources.list && \
+RUN echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main" >> /etc/apt/sources.list && \
     wget -qO - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
 
 RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main" >> /etc/apt/sources.list && \
@@ -37,28 +38,28 @@ RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main
 RUN apt-get update && apt-get full-upgrade -y && \
     apt-get -y install --no-install-suggests --no-install-recommends \
     gcc-10 g++-10 gcc-10-plugin-dev gcc-10-multilib gdb lcov \
-    clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
-    libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \
-    libclang-common-11-dev libclang-cpp11 libclang-cpp11-dev liblld-11 \
-    liblld-11-dev liblldb-11 liblldb-11-dev libllvm11 libomp-11-dev \
-    libomp5-11 lld-11 lldb-11 llvm-11 llvm-11-dev llvm-11-runtime llvm-11-tools \
+    clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
+    libc++abi1-12 libc++abi-12-dev libclang1-12 libclang-12-dev \
+    libclang-common-12-dev libclang-cpp12 libclang-cpp12-dev liblld-12 \
+    liblld-12-dev liblldb-12 liblldb-12-dev libllvm12 libomp-12-dev \
+    libomp5-12 lld-12 lldb-12 llvm-12 llvm-12-dev llvm-12-runtime llvm-12-tools \
     && rm -rf /var/lib/apt/lists/*
 
 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 0
 RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 0
 
-ENV LLVM_CONFIG=llvm-config-11
+ENV LLVM_CONFIG=llvm-config-12
 ENV AFL_SKIP_CPUFREQ=1
+ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
 
-RUN git clone https://github.com/vanhauser-thc/afl-cov /afl-cov
+RUN git clone --depth=1 https://github.com/vanhauser-thc/afl-cov /afl-cov
 RUN cd /afl-cov && make install && cd ..
 
 COPY . /AFLplusplus
 WORKDIR /AFLplusplus
 
-RUN export REAL_CXX=g++-10 && export CC=gcc-10 && \
-    export CXX=g++-10 && make clean && \
-    make distrib CFLAGS="-O3 -funroll-loops -D_FORTIFY_SOURCE=2" && make install && make clean
+RUN export CC=gcc-10 && export CXX=g++-10 && make clean && \
+    make distrib && make install && make clean
 
 RUN echo 'alias joe="jupp --wordwrap"' >> ~/.bashrc
 RUN echo 'export PS1="[afl++]$PS1"' >> ~/.bashrc
diff --git a/GNUmakefile b/GNUmakefile
index f586733f..0420cdfc 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -75,16 +75,17 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -fno-move-loop-invariants -
 	SPECIAL_PERFORMANCE += -fno-move-loop-invariants -fdisable-tree-cunrolli
 endif
 
-ifndef SOURCE_DATE_EPOCH
-  ifndef NO_ARCH_OPT
-    ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-      HAVE_MARCHNATIVE = 1
-      CFLAGS_OPT += -march=native
-    endif
-  endif
-endif
+#ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+#  ifndef SOURCE_DATE_EPOCH
+#    HAVE_MARCHNATIVE = 1
+#    CFLAGS_OPT += -march=native
+#  endif
+#endif
 
 ifneq "$(shell uname)" "Darwin"
+  #ifeq "$(HAVE_MARCHNATIVE)" "1"
+  #  SPECIAL_PERFORMANCE += -march=native
+  #endif
  # OS X does not like _FORTIFY_SOURCE=2
   ifndef DEBUG
     CFLAGS_OPT += -D_FORTIFY_SOURCE=2
@@ -363,7 +364,7 @@ help:
 	@echo NO_PYTHON - disable python support
 	@echo NO_SPLICING - disables splicing mutation in afl-fuzz, not recommended for normal fuzzing
 	@echo AFL_NO_X86 - if compiling on non-intel/amd platforms
-	@echo NO_ARCH_OPT - builds afl++ without machine architecutre optimizations
+	@echo NO_ARCH_OPT - builds afl++ without machine architecture optimizations
 	@echo "LLVM_CONFIG - if your distro doesn't use the standard name for llvm-config (e.g. Debian)"
 	@echo "=========================================="
 	@echo e.g.: make ASAN_BUILD=1
@@ -426,8 +427,8 @@ src/afl-sharedmem.o : $(COMM_HDR) src/afl-sharedmem.c include/sharedmem.h
 afl-fuzz: $(COMM_HDR) include/afl-fuzz.h $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o | test_x86
 	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o -o $@ $(PYFLAGS) $(LDFLAGS) -lm
 
-afl-showmap: src/afl-showmap.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(LDFLAGS)
+afl-showmap: src/afl-showmap.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o -o $@ $(LDFLAGS)
 
 afl-tmin: src/afl-tmin.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o $(COMM_HDR) | test_x86
 	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o -o $@ $(LDFLAGS)
@@ -507,6 +508,8 @@ code-format:
 	./.custom-format.py -i qemu_mode/libcompcov/*.c
 	./.custom-format.py -i qemu_mode/libcompcov/*.cc
 	./.custom-format.py -i qemu_mode/libcompcov/*.h
+	./.custom-format.py -i qemu_mode/libqasan/*.c
+	./.custom-format.py -i qemu_mode/libqasan/*.h
 	./.custom-format.py -i *.h
 	./.custom-format.py -i *.c
 
@@ -515,7 +518,7 @@ code-format:
 ifndef AFL_NO_X86
 test_build: afl-cc afl-gcc afl-as afl-showmap
 	@echo "[*] Testing the CC wrapper afl-cc and its instrumentation output..."
-	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
+	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN; ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
@@ -561,6 +564,7 @@ clean:
 	$(MAKE) -C utils/argv_fuzzing clean
 	$(MAKE) -C qemu_mode/unsigaction clean
 	$(MAKE) -C qemu_mode/libcompcov clean
+	$(MAKE) -C qemu_mode/libqasan clean
 ifeq "$(IN_REPO)" "1"
 	test -e qemu_mode/qemuafl/Makefile && $(MAKE) -C qemu_mode/qemuafl clean || true
 	test -e unicorn_mode/unicornafl/Makefile && $(MAKE) -C unicorn_mode/unicornafl clean || true
@@ -636,6 +640,7 @@ install: all $(MANPAGES)
 	@if [ -f libdislocator.so ]; then set -e; install -m 755 libdislocator.so $${DESTDIR}$(HELPER_PATH); fi
 	@if [ -f libtokencap.so ]; then set -e; install -m 755 libtokencap.so $${DESTDIR}$(HELPER_PATH); fi
 	@if [ -f libcompcov.so ]; then set -e; install -m 755 libcompcov.so $${DESTDIR}$(HELPER_PATH); fi
+	@if [ -f libqasan.so ]; then set -e; install -m 755 libqasan.so $${DESTDIR}$(HELPER_PATH); fi
 	@if [ -f afl-fuzz-document ]; then set -e; install -m 755 afl-fuzz-document $${DESTDIR}$(BIN_PATH); fi
 	@if [ -f socketfuzz32.so -o -f socketfuzz64.so ]; then $(MAKE) -C utils/socket_fuzzing install; fi
 	@if [ -f argvfuzz32.so -o -f argvfuzz64.so ]; then $(MAKE) -C utils/argv_fuzzing install; fi
diff --git a/GNUmakefile.llvm b/GNUmakefile.llvm
index 3554c8bf..cc332f6c 100644
--- a/GNUmakefile.llvm
+++ b/GNUmakefile.llvm
@@ -43,7 +43,8 @@ endif
 LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/git//' | sed 's/svn//' )
 LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//' )
 LLVM_MINOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/.*\.//' | sed 's/git//' | sed 's/svn//' | sed 's/ .*//' )
-LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^19' && echo 1 || echo 0 )
+LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^[0-2]\.' && echo 1 || echo 0 )
+LLVM_TOO_NEW = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[3-9]' && echo 1 || echo 0 )
 LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
 LLVM_10_OK = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[1-9]|^10\.[1-9]|^10\.0.[1-9]' && echo 1 || echo 0 )
 LLVM_HAVE_LTO = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[1-9]' && echo 1 || echo 0 )
@@ -58,7 +59,11 @@ ifeq "$(LLVMVER)" ""
 endif
 
 ifeq "$(LLVM_UNSUPPORTED)" "1"
-  $(warning llvm_mode only supports llvm versions 3.4 up to 12)
+  $(error llvm_mode only supports llvm from version 3.4 onwards)
+endif
+
+ifeq "$(LLVM_TOO_NEW)" "1"
+  $(warning you are using an in-development llvm version - this might break llvm_mode!)
 endif
 
 LLVM_TOO_OLD=1
@@ -208,12 +213,12 @@ ifeq "$(LLVM_LTO)" "1"
       ifneq "$(shell readlink $(LLVM_BINDIR)/ld.lld 2>&1)" ""
         AFL_REAL_LD = $(LLVM_BINDIR)/ld.lld
       else
-        $(warn ld.lld not found, cannot enable LTO mode)
+        $(warning ld.lld not found, cannot enable LTO mode)
         LLVM_LTO = 0
       endif
     endif
   else
-    $(warn clang option -flto is not working - maybe LLVMgold.so not found - cannot enable LTO mode)
+    $(warning clang option -flto is not working - maybe LLVMgold.so not found - cannot enable LTO mode)
     LLVM_LTO = 0
   endif
 endif
@@ -226,7 +231,7 @@ ifeq "$(LLVM_LTO)" "1"
       AFL_CLANG_LDPATH=1
     endif
   else
-    $(warn -fuse-ld is not working, cannot enable LTO mode)
+    $(warning -fuse-ld is not working, cannot enable LTO mode)
     LLVM_LTO = 0
   endif
 endif
diff --git a/README.md b/README.md
index d60253b3..00095390 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,6 @@
 
   <img align="right" src="https://raw.githubusercontent.com/andreafioraldi/AFLplusplus-website/master/static/logo_256x256.png" alt="AFL++ Logo">
 
-  ![Travis State](https://api.travis-ci.com/AFLplusplus/AFLplusplus.svg?branch=stable)
-
   Release Version: [3.00c](https://github.com/AFLplusplus/AFLplusplus/releases)
 
   Github Version: 3.01a
@@ -31,10 +29,10 @@ With afl++ 3.0 we introduced changes that break some previous afl and afl++
 behaviours and defaults:
 
   * There are no llvm_mode and gcc_plugin subdirectories anymore and there is
-    only one compiler: afl-cc. All previous compilers now symlink to this.
+    only one compiler: afl-cc. All previous compilers now symlink to this one.
     All instrumentation source code is now in the `instrumentation/` folder.
   * The gcc_plugin was replaced with a new version submitted by AdaCore that
-    supports more features. thank you!
+    supports more features. Thank you!
   * qemu_mode got upgraded to QEMU 5.1, but to be able to build this a current
     ninja build tool version and python3 setuptools are required.
     qemu_mode also got new options like snapshotting, instrumenting specific
@@ -55,6 +53,7 @@ behaviours and defaults:
     * a caching of testcases can now be performed and can be modified by
       editing config.h for TESTCASE_CACHE or by specifying the env variable
       `AFL_TESTCACHE_SIZE` (in MB). Good values are between 50-500 (default: 50).
+    * -M mains do not perform trimming
   * examples/ got renamed to utils/
   * libtokencap/ libdislocator/ and qdbi_mode/ were moved to utils/
   * afl-cmin/afl-cmin.bash now search first in PATH and last in AFL_PATH
@@ -67,9 +66,10 @@ behaviours and defaults:
   3. [How to fuzz a target](#how-to-fuzz-with-afl)
   4. [Fuzzing binary-only targets](#fuzzing-binary-only-targets)
   5. [Good examples and writeups of afl++ usages](#good-examples-and-writeups)
-  6. [Branches](#branches)
-  7. [Want to help?](#help-wanted)
-  8. [Detailed help and description of afl++](#challenges-of-guided-fuzzing)
+  6. [CI Fuzzing](#ci-fuzzing)
+  7. [Branches](#branches)
+  8. [Want to help?](#help-wanted)
+  9. [Detailed help and description of afl++](#challenges-of-guided-fuzzing)
 
 ## Important features of afl++
 
@@ -217,7 +217,7 @@ These build options exist:
 * NO_PYTHON - disable python support
 * NO_SPLICING - disables splicing mutation in afl-fuzz, not recommended for normal fuzzing
 * AFL_NO_X86 - if compiling on non-intel/amd platforms
-* NO_ARCH_OPT - builds afl++ without machine architecutre optimizations
+* NO_ARCH_OPT - builds afl++ without machine architecture optimizations
 * LLVM_CONFIG - if your distro doesn't use the standard name for llvm-config (e.g. Debian)
 
 e.g.: make ASAN_BUILD=1
@@ -689,6 +689,9 @@ If you want to know more, the rest of this README and the tons of texts in
 Note that there are also a lot of tools out there that help fuzzing with afl++
 (some might be deprecated or unsupported):
 
+Speeding up fuzzing:
+ * [libfiowrapper](https://github.com/marekzmyslowski/libfiowrapper) - if the function you want to fuzz requires loading a file, this allows using the shared memory testcase feature :-) - recommended.
+
 Minimization of test cases:
  * [afl-pytmin](https://github.com/ilsani/afl-pytmin) - a wrapper for afl-tmin that tries to speed up the process of minimization of a single test case by using many CPU cores.
  * [afl-ddmin-mod](https://github.com/MarkusTeufelberger/afl-ddmin-mod) - a variation of afl-tmin based on the ddmin algorithm. 
@@ -718,11 +721,55 @@ Crash processing
  * [AFLize](https://github.com/d33tah/aflize) - a tool that automatically generates builds of debian packages suitable for AFL.
  * [afl-fid](https://github.com/FoRTE-Research/afl-fid) - a set of tools for working with input data.
 
+## CI Fuzzing
+
+Some notes on CI Fuzzing - this fuzzing is different to normal fuzzing
+campaigns as these are much shorter runnings.
+
+1. Always:
+  * LTO has a much longer compile time which is diametrical to short fuzzing - 
+    hence use afl-clang-fast instead.
+  * If you compile with CMPLOG then you can save fuzzing time and reuse that
+    compiled target for both the -c option and the main fuzz target.
+    This will impact the speed by ~15% though.
+  * `AFL_FAST_CAL` - Enable fast calibration, this halfs the time the saturated
+     corpus needs to be loaded.
+  * `AFL_CMPLOG_ONLY_NEW` - only perform cmplog on new found paths, not the
+    initial corpus as this very likely has been done for them already.
+  * Keep the generated corpus, use afl-cmin and reuse it everytime!
+
+2. Additionally randomize the afl++ compilation options, e.g.
+  * 40% for `AFL_LLVM_CMPLOG`
+  * 10% for `AFL_LLVM_LAF_ALL`
+
+3. Also randomize the afl-fuzz runtime options, e.g.
+  * 60% for `AFL_DISABLE_TRIM`
+  * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
+  * 50% use MOpt (`-L 0`)
+  * 40% for `AFL_EXPAND_HAVOC_NOW`
+  * 30% for old queue processing (`-Z`)
+  * for CMPLOG targets, 60% for `-l 2`, 40% for `-l 3`
+
+4. Do *not* run any `-M` modes, just running `-S` modes is better for CI fuzzing.
+   `-M` enables deterministic fuzzing, old queue handling etc. which is good for
+   a fuzzing campaign but not good for short CI runs.
+
 ## Fuzzing binary-only targets
 
 When source code is *NOT* available, afl++ offers various support for fast,
 on-the-fly instrumentation of black-box binaries. 
 
+If you do not have to use Unicorn the following setup is recommended:
+  * run 1 afl-fuzz -Q instance with CMPLOG (`-c 0` + `AFL_COMPCOV_LEVEL=2`)
+  * run 1 afl-fuzz -Q instance with QASAN  (`AFL_USE_QASAN=1`)
+  * run 1 afl-fuzz -Q instance with LAF (``AFL_PRELOAD=libcmpcov.so` + `AFL_COMPCOV_LEVEL=2`)
+
+Then run as many instances as you have cores left with either -Q mode or - better -
+use a binary rewriter like afl-dyninst, retrowrite, zipr, fibre, etc.
+
+For Qemu mode, check out the persistent mode and snapshot features, they give
+a huge speed improvement!  
+
 ### QEMU
 
 For linux programs and its libraries this is accomplished with a version of
@@ -733,7 +780,8 @@ feature by doing:
 cd qemu_mode
 ./build_qemu_support.sh
 ```
-For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md).
+For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md) -
+check out the snapshot feature! :-)
 If possible you should use the persistent mode, see [qemu_mode/README.persistent.md](qemu_mode/README.persistent.md).
 The mode is approximately 2-5x slower than compile-time instrumentation, and is
 less conducive to parallelization.
@@ -741,6 +789,8 @@ less conducive to parallelization.
 If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for
 your binary, then you can use afl-fuzz normally and it will have twice
 the speed compared to qemu_mode (but slower than persistent mode).
+Note that several other binary rewriters exist, all with their advantages and
+caveats.
 
 ### Unicorn
 
diff --git a/afl-cmin b/afl-cmin
index 726e90ab..4ee79a79 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -183,14 +183,12 @@ BEGIN {
     if (_go_c == "Q") {
       if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
       extra_par = extra_par " -Q"
-      if ( !mem_limit_given ) mem_limit = "250"
       qemu_mode = 1
       continue
     } else 
     if (_go_c == "U") {
       if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
       extra_par = extra_par " -U"
-      if ( !mem_limit_given ) mem_limit = "250"
       unicorn_mode = 1
       continue
     } else 
@@ -200,7 +198,7 @@ BEGIN {
       usage()
   } # while options
 
-  if (!mem_limit) mem_limit = 200
+  if (!mem_limit) mem_limit = "none"
   if (!timeout) timeout = "none"
 
   # get program args
@@ -345,7 +343,7 @@ BEGIN {
     stat_format = "-f '%z %N'" # *BSD, MacOS
   }
   cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -k1n -k2r"
-  cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format") | sort -k1n -k2r"
+  cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r"
   while (cmdline | getline) {
     sub(/^[0-9]+ (\.\/)?/,"",$0)
     infilesSmallToBig[i++] = $0
@@ -357,7 +355,7 @@ BEGIN {
   # Make sure that we're not dealing with a directory.
 
   if (0 == system("test -d "in_dir"/"first_file)) {
-    print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr"
+    print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr"
     exit 1
   }
 
@@ -413,8 +411,8 @@ BEGIN {
     retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string)
   } else {
     print "    Processing "in_count" files (forkserver mode)..."
-#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string" </dev/null"
-    retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string" </dev/null")
+#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null"
+    retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
   }
 
   if (retval && !AFL_CMIN_CRASHES_ONLY) {
diff --git a/afl-cmin.bash b/afl-cmin.bash
index fb50f1fc..dae21939 100755
--- a/afl-cmin.bash
+++ b/afl-cmin.bash
@@ -45,7 +45,7 @@ echo
 
 # Process command-line options...
 
-MEM_LIMIT=200
+MEM_LIMIT=none
 TIMEOUT=none
 
 unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
@@ -85,12 +85,10 @@ while getopts "+i:o:f:m:t:eQUCh" opt; do
          ;;
     "Q")
          EXTRA_PAR="$EXTRA_PAR -Q"
-         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
          QEMU_MODE=1
          ;;
     "U")
          EXTRA_PAR="$EXTRA_PAR -U"
-         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
          UNICORN_MODE=1
          ;;    
     "?")
diff --git a/afl-system-config b/afl-system-config
index 456cccac..d5e5ceae 100755
--- a/afl-system-config
+++ b/afl-system-config
@@ -34,6 +34,7 @@ if [ "$PLATFORM" = "Linux" ] ; then
   test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
   test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo
   test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost
+  test -e /sys/devices/system/cpu/intel_pstate/max_perf_pct && echo 100 > /sys/devices/system/cpu/intel_pstate/max_perf_pct
 } > /dev/null
   echo Settings applied.
   dmesg | egrep -q 'nospectre_v2|spectre_v2=off' || {
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 60f09ca5..e2482f8f 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -11,31 +11,64 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 ### Version ++3.01a (dev)
   - Mac OS ARM64 support
-  - New selective instrumentation option with __AFL_COVERAGE_... commands
+  - Android support fixed and updated by Joey Jiaojg - thanks!
+  - New selective instrumentation option with __AFL_COVERAGE_* commands
     to be placed in the source code.
     Check out instrumentation/README.instrument_list.md
   - afl-fuzz
+    - Making AFL_MAP_SIZE (mostly) obsolete - afl-fuzz now learns on start
+      the target map size
+    - upgraded cmplog/redqueen: solving for floating point, solving
+      transformations (e.g. toupper, tolower, to/from hex, xor,
+      arithmetics, etc.). This is costly hence new command line option
+      `-l` that sets the intensity (values 1 to 3). Recommended is 1 or 2.
+    - added `AFL_CMPLOG_ONLY_NEW` to not use cmplog on initial testcases from
+      `-i` or resumes (as these have most likely already been done)
     - fix crash for very, very fast targets+systems (thanks to mhlakhani
       for reporting)
-    - if determinstic mode is active (-D, or -M without -d) then we sync
+    - on restarts (`-i`)/autoresume (AFL_AUTORESUME) the stats are now
+      reloaded and used, thanks to Vimal Joseph for this patch! 
+    - if deterministic mode is active (`-D`, or `-M` without `-d`) then we sync
       after every queue entry as this can take very long time otherwise
     - better detection if a target needs a large shared map
-    - switched to a faster RNG
+    - fix for `-Z`
+    - switched to an even faster RNG
     - added hghwng's patch for faster trace map analysis
   - afl-cc
     - allow instrumenting LLVMFuzzerTestOneInput
     - fixed endless loop for allow/blocklist lines starting with a
       comment (thanks to Zherya for reporting)
-    - cmplog/redqueen now also tracks floats/doubles
+    - cmplog/redqueen now also tracks floating point, _ExtInt() + 128bit
+    - cmplog/redqueen can now process basic libc++ and libstdc++
+      std::string comparisons (though no position or length type variants)
+    - added support for __afl_coverage_interesting() for LTO and
+      and our own PCGUARD (llvm 10.0.1+), read more about this function
+      and selective coverage in instrumentation/README.instrument_list.md
     - added AFL_LLVM_INSTRUMENT option NATIVE for native clang pc-guard
       support (less performant than our own), GCC for old afl-gcc and
       CLANG for old afl-clang
-  - warn on any _AFL and __AFL env var
-  - LLVM mode is now compiled with -j4, unicorn with all cores. qemu was
-    already building with all cores, the gcc plugin needs only one.
+    - fixed a potential crash in the LAF feature
+    - workaround for llvm 13
+    - workaround for llvm internal lto bug that lets not bitcast from _ExtInt()
+  - qemuafl
+    - QASan (address sanitizer for Qemu) ported to qemuafl!
+      See qemu_mode/libqasan/README.md
+    - solved some persistent mode bugs (thanks Dil4rd)
+    - solved an issue when dumping the memory maps (thanks wizche)
+    - Android support for QASan
+  - unicornafl
+    - Substantial speed gains in python bindings for certain use cases
+    - Improved rust bindings
+    - Added a new example harness to compare python, c, and rust bindings
+  - afl-cmin and afl-showmap now support the -f option
+  - changed default: no memory limit for afl-cmin and afl-cmin.bash
+  - warn on any _AFL and __AFL env vars.
+  - set AFL_IGNORE_UNKNOWN_ENVS to not warn on unknown AFL_... env vars.
   - added dummy Makefile to instrumentation/
-  - Updated utils/afl_frida to be 5% faster
-  - Added AFL_KILL_SIGNAL env variable for custom targets (thanks @v-p-b)
+  - Updated utils/afl_frida to be 5% faster, 7% on x86_x64
+  - Added AFL_KILL_SIGNAL env variable (thanks @v-p-b)
+  - @Edznux added a nice documentation on how to use rpc.statsd with
+    afl++ in docs/rpc_statsd.md, thanks!
 
 ### Version ++3.00c (release)
   - llvm_mode/ and gcc_plugin/ moved to instrumentation/
diff --git a/docs/env_variables.md b/docs/env_variables.md
index 26128b01..f6ed12d0 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -5,6 +5,10 @@
   users or for some types of custom fuzzing setups. See [README.md](README.md) for the general
   instruction manual.
 
+  Note that most tools will warn on any unknown AFL environment variables.
+  This is for warning on typos that can happen. If you want to disable this
+  check then set the `AFL_IGNORE_UNKNOWN_ENVS` environment variable.
+
 ## 1) Settings for all compilers
 
 Starting with afl++ 3.0 there is only one compiler: afl-cc
@@ -18,7 +22,6 @@ To select the different instrumentation modes this can be done by
 `MODE` can be one of `LTO` (afl-clang-lto*), `LLVM` (afl-clang-fast*), `GCC_PLUGIN`
 (afl-g*-fast) or `GCC` (afl-gcc/afl-g++).
 
-
 Because (with the exception of the --afl-MODE command line option) the
 compile-time tools do not accept afl specific command-line options, they
 make fairly broad use of environmental variables instead:
@@ -287,6 +290,11 @@ checks or alter some of the more exotic semantics of the tool:
     the target. This must be equal or larger than the size the target was
     compiled with.
 
+  - `AFL_CMPLOG_ONLY_NEW` will only perform the expensive cmplog feature for
+    newly found testcases and not for testcases that are loaded on startup
+    (`-i in`). This is an important feature to set when resuming a fuzzing
+    session.
+
   - `AFL_TESTCACHE_SIZE` allows you to override the size of `#define TESTCASE_CACHE`
     in config.h. Recommended values are 50-250MB - or more if your fuzzing
     finds a huge amount of paths for large inputs.
@@ -431,13 +439,19 @@ checks or alter some of the more exotic semantics of the tool:
     normally done when starting up the forkserver and causes a pretty
     significant performance drop.
 
-  - Setting `AFL_STATSD` enable StatsD metrics collection.
+  - Setting `AFL_STATSD` enables StatsD metrics collection.
     By default AFL++ will send these metrics over UDP to 127.0.0.1:8125.
-    The host and port are configurable with `AFL_STATSD_HOST` and `AFL_STATSD_PORT`
-    respectively.
-    To get the most out of this, you should provide `AFL_STATSD_TAGS_FLAVOR` that
-    matches your StatsD server.
-    Available flavors are `dogstatsd`, `librato`, `signalfx` and `influxdb`.
+    The host and port are configurable with `AFL_STATSD_HOST` and `AFL_STATSD_PORT` respectively.
+    To enable tags (banner and afl_version) you should provide `AFL_STATSD_TAGS_FLAVOR` that matches
+    your StatsD server (see `AFL_STATSD_TAGS_FLAVOR`)
+
+  - Setting `AFL_STATSD_TAGS_FLAVOR` to one of `dogstatsd`, `librato`, `signalfx` or `influxdb`
+    allows you to add tags to your fuzzing instances. This is especially useful when running
+    multiple instances (`-M/-S` for example). Applied tags are `banner` and `afl_version`.
+    `banner` corresponds to the name of the fuzzer provided through `-M/-S`.
+    `afl_version` corresponds to the currently running afl version (e.g `++3.0c`).
+    Default (empty/non present) will add no tags to the metrics.
+    See [rpc_statsd.md](rpc_statsd.md) for more information.
 
   - Setting `AFL_CRASH_EXITCODE` sets the exit code afl treats as crash.
     For example, if `AFL_CRASH_EXITCODE='-1'` is set, each input resulting
@@ -504,6 +518,12 @@ The QEMU wrapper used to instrument binary-only code supports several settings:
     stack pointer in which QEMU can find the return address when `start addr` is
     hit.
 
+  - With `AFL_USE_QASAN` you can enable QEMU AddressSanitizer for dynamically
+    linked binaries.
+
+  - With `AFL_QEMU_FORCE_DFL` you force QEMU to ignore the registered signal
+    handlers of the target.
+
 ## 6) Settings for afl-cmin
 
 The corpus minimization script offers very little customization:
@@ -519,7 +539,7 @@ The corpus minimization script offers very little customization:
     a modest security risk on multi-user systems with rogue users, but should
     be safe on dedicated fuzzing boxes.
 
-# #6) Settings for afl-tmin
+## 7) Settings for afl-tmin
 
 Virtually nothing to play with. Well, in QEMU mode (`-Q`), `AFL_PATH` will be
 searched for afl-qemu-trace. In addition to this, `TMPDIR` may be used if a
diff --git a/docs/ideas.md b/docs/ideas.md
index aaa3eed1..08cb16ef 100644
--- a/docs/ideas.md
+++ b/docs/ideas.md
@@ -3,6 +3,40 @@
 In the following, we describe a variety of ideas that could be implemented
 for future AFL++ versions.
 
+# GSoC 2021
+
+All GSoC 2021 projects will be in the Rust development language!
+
+## UI for libaflrs
+
+Write a user interface to libaflrs, the upcoming backend of afl++.
+This might look like the afl-fuzz UI, but you can improve on it - and should!
+
+## Schedulers for libaflrs
+
+Schedulers is a mechanism that selects items from the fuzzing corpus based
+on strategy and randomness. One scheduler might focus on long paths,
+another on rarity of edges disocvered, still another on a combination on
+things. Some of the schedulers in afl++ have to be ported, but you are free
+to come up with your own if you want to - and see how it performs.
+
+## Forkserver support for libaflrs
+
+The current libaflrs implementation fuzzes in-memory, however obviously we
+want to support afl instrumented binaries as well.
+Hence a forkserver support needs to be implemented - forking off the target
+and talking to the target via a socketpair and the communication protocol
+within.
+
+## More Observers for libaflrs
+
+An observer is measuring functionality that looks at the target being fuzzed
+and documents something about it. In traditional fuzzing this is the coverage
+in the target, however we want to add various more observers, e.g. stack depth,
+heap usage, etc. - this is a topic for an experienced Rust developer.
+
+# Generic ideas and wishlist
+
 ## Analysis software
 
 Currently analysis is done by using afl-plot, which is rather outdated.
@@ -16,6 +50,8 @@ test cases executed.
 It should be clickable which value is X and Y axis, zoom factor, log scaling
 on-off, etc.
 
+Mentor: vanhauser-thc
+
 ## WASM Instrumentation
 
 Currently, AFL++ can be used for source code fuzzing and traditional binaries.
@@ -36,19 +72,6 @@ Either improve a single mutator thorugh learning of many different bugs
 
 Mentor: domenukk
 
-## Collision-free Binary-Only Maps
-
-AFL++ supports collison-free maps using an LTO (link-time-optimization) pass.
-This should be possible to implement for QEMU and Unicorn instrumentations.
-As the forkserver parent caches just in time translated translation blocks,
-adding a simple counter between jumps should be doable.
-
-Note: this is already in development for qemu by Andrea, so for people who
-want to contribute it might make more sense to port his solution to unicorn.
-
-Mentor: andreafioraldi or domenukk
-Issue/idea tracker: [https://github.com/AFLplusplus/AFLplusplus/issues/237](https://github.com/AFLplusplus/AFLplusplus/issues/237)
-
 ## Your idea!
 
 Finally, we are open to proposals!
diff --git a/docs/rpc_statsd.md b/docs/rpc_statsd.md
new file mode 100644
index 00000000..fb97aa09
--- /dev/null
+++ b/docs/rpc_statsd.md
@@ -0,0 +1,143 @@
+# Remote monitoring with StatsD
+
+StatsD allows you to receive and aggregate metrics from a wide range of applications and retransmit them to the backend of your choice.
+This enables you to create nice and readable dashboards containing all the information you need on your fuzzer instances.
+No need to write your own statistics parsing system, deploy and maintain it to all your instances, sync with your graph rendering system...
+
+The available metrics are :
+- cycle_done
+- cycles_wo_finds
+- execs_done
+- execs_per_sec
+- paths_total
+- paths_favored
+- paths_found
+- paths_imported
+- max_depth
+- cur_path
+- pending_favs
+- pending_total
+- variable_paths
+- unique_crashes
+- unique_hangs
+- total_crashes
+- slowest_exec_ms
+- edges_found
+- var_byte_count
+- havoc_expansion
+
+Compared to the default integrated UI, these metrics give you the opportunity to visualize trends and fuzzing state over time.
+By doing so, you might be able to see when the fuzzing process has reached a state of no progress, visualize what are the "best strategies"
+(according to your own criteria) for your targets, etc. And doing so without requiring to log into each instance manually.
+
+An example visualisation may look like the following:
+![StatsD Grafana](visualization/statsd-grafana.png)
+
+*Notes: The exact same dashboard can be imported with [this JSON template](statsd/grafana-afl++.json).*
+
+## How to use
+
+To enable the StatsD reporting on your fuzzer instances, you need to set the environment variable `AFL_STATSD=1`.
+
+Setting `AFL_STATSD_TAGS_FLAVOR` to the provider of your choice will assign tags / labels to each metric based on their format.
+The possible values are  `dogstatsd`, `librato`, `signalfx` or `influxdb`.
+For more information on these env vars, check out `docs/env_variables.md`.
+
+The simplest way of using this feature is to use any metric provider and change the host/port of your StatsD daemon,
+with `AFL_STATSD_HOST` and `AFL_STATSD_PORT`, if required (defaults are `localhost` and port `8125`).
+To get started, here are some instructions with free and open source tools.
+The following setup is based on Prometheus, statsd_exporter and Grafana.
+Grafana here is not mandatory, but gives you some nice graphs and features.
+
+Depending on your setup and infrastructure, you may want to run these applications not on your fuzzer instances.
+Only one instance of these 3 application is required for all your fuzzers.
+
+To simplify everything, we will use Docker and docker-compose.
+Make sure you have them both installed. On most common Linux distributions, it's as simple as:
+
+```sh
+curl -fsSL https://get.docker.com -o get-docker.sh
+sh get-docker.sh
+```
+
+Once that's done, we can create the infrastructure.
+Create and move into the directory of your choice. This will store all the configurations files required.
+
+First, create a `docker-compose.yml` containing the following:
+```yml
+version: '3'
+
+networks:
+  statsd-net:
+    driver: bridge
+
+services:
+  prometheus:
+    image: prom/prometheus
+    container_name: prometheus
+    volumes:
+      - ./prometheus.yml:/prometheus.yml
+    command:
+      - '--config.file=/prometheus.yml'
+    restart: unless-stopped
+    ports:
+      - "9090:9090"
+    networks:
+      - statsd-net
+
+  statsd_exporter:
+    image: prom/statsd-exporter
+    container_name: statsd_exporter
+    volumes:
+      - ./statsd_mapping.yml:/statsd_mapping.yml
+    command:
+      - "--statsd.mapping-config=/statsd_mapping.yml"
+    ports:
+      - "9102:9102/tcp"
+      - "8125:9125/udp"
+    networks:
+      - statsd-net
+  
+  grafana:
+    image: grafana/grafana
+    container_name: grafana
+    restart: unless-stopped
+    ports:
+        - "3000:3000"
+    networks:
+      - statsd-net
+```
+
+Then `prometheus.yml`
+```yml
+global:
+  scrape_interval:      15s
+  evaluation_interval:  15s
+
+scrape_configs:
+  - job_name: 'fuzzing_metrics'
+    static_configs:
+      - targets: ['statsd_exporter:9102']
+```
+
+And finally `statsd_mapping.yml`
+```yml 
+mappings:
+- match: "fuzzing.*"
+  name: "fuzzing"
+  labels:
+      type: "$1"
+```
+
+Run `docker-compose up -d`.
+
+Everything should now be setup, you are now able to run your fuzzers with
+
+```
+AFL_STATSD_TAGS_FLAVOR=dogstatsd AFL_STATSD=1 afl-fuzz -M test-fuzzer-1 -i i -o o ./bin/my-application @@
+AFL_STATSD_TAGS_FLAVOR=dogstatsd AFL_STATSD=1 afl-fuzz -S test-fuzzer-2 -i i -o o ./bin/my-application @@
+...
+```
+
+This setup may be modified before use in a production environment. Depending on your needs: adding passwords, creating volumes for storage,
+tweaking the metrics gathering to get host metrics (CPU, RAM ...).
diff --git a/docs/statsd/grafana-afl++.json b/docs/statsd/grafana-afl++.json
new file mode 100644
index 00000000..96e824de
--- /dev/null
+++ b/docs/statsd/grafana-afl++.json
@@ -0,0 +1,1816 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": 1,
+  "links": [],
+  "panels": [
+    {
+      "datasource": null,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 16,
+      "title": "Row title",
+      "type": "row"
+    },
+    {
+      "alert": {
+        "alertRuleTags": {},
+        "conditions": [
+          {
+            "evaluator": {
+              "params": [
+                500
+              ],
+              "type": "lt"
+            },
+            "operator": {
+              "type": "and"
+            },
+            "query": {
+              "params": [
+                "A",
+                "5m",
+                "now"
+              ]
+            },
+            "reducer": {
+              "params": [],
+              "type": "avg"
+            },
+            "type": "query"
+          }
+        ],
+        "executionErrorState": "alerting",
+        "for": "5m",
+        "frequency": "1m",
+        "handler": 1,
+        "name": "Slow exec per sec",
+        "noDataState": "no_data",
+        "notifications": []
+      },
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 6,
+        "w": 10,
+        "x": 0,
+        "y": 1
+      },
+      "hiddenSeries": false,
+      "id": 12,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"execs_per_sec\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [
+        {
+          "colorMode": "critical",
+          "fill": true,
+          "line": true,
+          "op": "lt",
+          "value": 500
+        }
+      ],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Exec/s",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 6,
+        "w": 10,
+        "x": 10,
+        "y": 1
+      },
+      "hiddenSeries": false,
+      "id": 8,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"total_crashes\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Total Crashes",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 4,
+        "x": 20,
+        "y": 1
+      },
+      "hiddenSeries": false,
+      "id": 19,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"var_byte_count\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Var Byte Count",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 6,
+        "w": 10,
+        "x": 0,
+        "y": 7
+      },
+      "hiddenSeries": false,
+      "id": 10,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"unique_crashes\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Unique Crashes",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 6,
+        "w": 10,
+        "x": 10,
+        "y": 7
+      },
+      "hiddenSeries": false,
+      "id": 14,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"unique_hangs\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Unique Hangs",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 0,
+        "y": 13
+      },
+      "hiddenSeries": false,
+      "id": 23,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"slowest_exec_ms\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Slowest Exec Ms",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 5,
+        "y": 13
+      },
+      "hiddenSeries": false,
+      "id": 4,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"cycle_done\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Cycles dones",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 10,
+        "y": 13
+      },
+      "hiddenSeries": false,
+      "id": 13,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"execs_done\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Total Execs",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 15,
+        "y": 13
+      },
+      "hiddenSeries": false,
+      "id": 2,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"cur_path\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Curent path",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 0,
+        "y": 18
+      },
+      "hiddenSeries": false,
+      "id": 6,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"cycles_wo_finds\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Cycles done without find",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 5,
+        "y": 18
+      },
+      "hiddenSeries": false,
+      "id": 25,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"paths_favored\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Path Favored",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 10,
+        "y": 18
+      },
+      "hiddenSeries": false,
+      "id": 22,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"havoc_expansion\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Havoc Expansion",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 15,
+        "y": 18
+      },
+      "hiddenSeries": false,
+      "id": 17,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"edges_found\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Edges Found",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 0,
+        "y": 23
+      },
+      "hiddenSeries": false,
+      "id": 24,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"paths_imported\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Path Imported",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 5,
+        "y": 23
+      },
+      "hiddenSeries": false,
+      "id": 21,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"pending_total\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Pending Total",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 10,
+        "y": 23
+      },
+      "hiddenSeries": false,
+      "id": 20,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"pending_favs\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Pending favs",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fieldConfig": {
+        "defaults": {
+          "custom": {}
+        },
+        "overrides": []
+      },
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 5,
+        "w": 5,
+        "x": 15,
+        "y": 23
+      },
+      "hiddenSeries": false,
+      "id": 18,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": false,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "alertThreshold": true
+      },
+      "percentage": false,
+      "pluginVersion": "7.3.7",
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "fuzzing{type=\"max_depth\"}",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [
+        {
+          "colorMode": "background6",
+          "fill": true,
+          "fillColor": "rgba(234, 112, 112, 0.12)",
+          "line": false,
+          "lineColor": "rgba(237, 46, 24, 0.60)",
+          "op": "time"
+        }
+      ],
+      "timeShift": null,
+      "title": "Max Depth",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    }
+  ],
+  "refresh": false,
+  "schemaVersion": 26,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-30m",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Fuzzing",
+  "uid": "sRI6PCfGz",
+  "version": 2
+}
\ No newline at end of file
diff --git a/docs/visualization/statsd-grafana.png b/docs/visualization/statsd-grafana.png
new file mode 100644
index 00000000..1bdc1722
--- /dev/null
+++ b/docs/visualization/statsd-grafana.png
Binary files differdiff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 6342c8b6..1d5ec1f0 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -141,12 +141,23 @@ extern s16 interesting_16[INTERESTING_8_LEN + INTERESTING_16_LEN];
 extern s32
     interesting_32[INTERESTING_8_LEN + INTERESTING_16_LEN + INTERESTING_32_LEN];
 
+struct tainted {
+
+  u32             pos;
+  u32             len;
+  struct tainted *next;
+  struct tainted *prev;
+
+};
+
 struct queue_entry {
 
   u8 *fname;                            /* File name for the test case      */
   u32 len;                              /* Input length                     */
+  u32 id;                               /* entry number in queue_buf        */
 
-  u8   cal_failed;                      /* Calibration failed?              */
+  u8 colorized,                         /* Do not run redqueen stage again  */
+      cal_failed;                       /* Calibration failed?              */
   bool trim_done,                       /* Trimmed?                         */
       was_fuzzed,                       /* historical, but needed for MOpt  */
       passed_det,                       /* Deterministic stages passed?     */
@@ -154,7 +165,6 @@ struct queue_entry {
       var_behavior,                     /* Variable behavior?               */
       favored,                          /* Currently favored?               */
       fs_redundant,                     /* Marked as redundant in the fs?   */
-      fully_colorized,                  /* Do not run redqueen stage again  */
       is_ascii,                         /* Is the input just ascii text?    */
       disabled;                         /* Is disabled from fuzz selection  */
 
@@ -179,7 +189,10 @@ struct queue_entry {
 
   u8 *testcase_buf;                     /* The testcase buffer, if loaded.  */
 
-  struct queue_entry *next;             /* Next element, if any             */
+  u8 *            cmplog_colorinput;    /* the result buf of colorization   */
+  struct tainted *taint;                /* Taint information from CmpLog    */
+
+  struct queue_entry *mother;           /* queue entry this based on        */
 
 };
 
@@ -371,7 +384,7 @@ typedef struct afl_env_vars {
       afl_dumb_forksrv, afl_import_first, afl_custom_mutator_only, afl_no_ui,
       afl_force_ui, afl_i_dont_care_about_missing_crashes, afl_bench_just_one,
       afl_bench_until_crash, afl_debug_child, afl_autoresume, afl_cal_fast,
-      afl_cycle_schedules, afl_expand_havoc, afl_statsd;
+      afl_cycle_schedules, afl_expand_havoc, afl_statsd, afl_cmplog_only_new;
 
   u8 *afl_tmpdir, *afl_custom_mutator_library, *afl_python_module, *afl_path,
       *afl_hang_tmout, *afl_forksrv_init_tmout, *afl_skip_crashes, *afl_preload,
@@ -391,7 +404,7 @@ struct afl_pass_stat {
 struct foreign_sync {
 
   u8 *   dir;
-  time_t ctime;
+  time_t mtime;
 
 };
 
@@ -412,7 +425,8 @@ typedef struct afl_state {
     really makes no sense to haul them around as function parameters. */
   u64 orig_hit_cnt_puppet, last_limit_time_start, tmp_pilot_time,
       total_pacemaker_time, total_puppet_find, temp_puppet_find, most_time_key,
-      most_time, most_execs_key, most_execs, old_hit_count, force_ui_update;
+      most_time, most_execs_key, most_execs, old_hit_count, force_ui_update,
+      prev_run_time;
 
   MOpt_globals_t mopt_globals_core, mopt_globals_pilot;
 
@@ -632,6 +646,9 @@ typedef struct afl_state {
   /* cmplog forkserver ids */
   s32 cmplog_fsrv_ctl_fd, cmplog_fsrv_st_fd;
   u32 cmplog_prev_timed_out;
+  u32 cmplog_max_filesize;
+  u32 cmplog_lvl;
+  u32 colorize_success;
 
   struct afl_pass_stat *pass_stats;
   struct cmp_map *      orig_cmp_map;
@@ -1051,6 +1068,7 @@ void destroy_extras(afl_state_t *);
 
 /* Stats */
 
+void load_stats_file(afl_state_t *);
 void write_setup_file(afl_state_t *, u32, char **);
 void write_stats_file(afl_state_t *, double, double, double);
 void maybe_update_plot_file(afl_state_t *, double, double);
@@ -1117,9 +1135,9 @@ void   read_foreign_testcases(afl_state_t *, int);
 u8 common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len);
 
 /* RedQueen */
-u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
-                        u64 exec_cksum);
+u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len);
 
+/* our RNG wrapper */
 AFL_RAND_RETURN rand_next(afl_state_t *afl);
 
 /* probability between 0.0 and 1.0 */
diff --git a/include/android-ashmem.h b/include/android-ashmem.h
index 6939e06d..91699b27 100644
--- a/include/android-ashmem.h
+++ b/include/android-ashmem.h
@@ -1,81 +1,83 @@
 #ifdef __ANDROID__
-#ifndef _ANDROID_ASHMEM_H
-#define _ANDROID_ASHMEM_H
-
-#include <fcntl.h>
-#include <linux/ashmem.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-
-#if __ANDROID_API__ >= 26
-#define shmat bionic_shmat
-#define shmctl bionic_shmctl
-#define shmdt bionic_shmdt
-#define shmget bionic_shmget
-#endif
-#include <sys/shm.h>
-#undef shmat
-#undef shmctl
-#undef shmdt
-#undef shmget
-#include <stdio.h>
-
-#define ASHMEM_DEVICE "/dev/ashmem"
+  #ifndef _ANDROID_ASHMEM_H
+    #define _ANDROID_ASHMEM_H
+
+    #include <fcntl.h>
+    #include <linux/ashmem.h>
+    #include <sys/ioctl.h>
+    #include <sys/mman.h>
+
+    #if __ANDROID_API__ >= 26
+      #define shmat bionic_shmat
+      #define shmctl bionic_shmctl
+      #define shmdt bionic_shmdt
+      #define shmget bionic_shmget
+    #endif
+    #include <sys/shm.h>
+    #undef shmat
+    #undef shmctl
+    #undef shmdt
+    #undef shmget
+    #include <stdio.h>
+
+    #define ASHMEM_DEVICE "/dev/ashmem"
 
 int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf) {
+
   int ret = 0;
   if (__cmd == IPC_RMID) {
-    int length = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
+
+    int               length = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
     struct ashmem_pin pin = {0, length};
     ret = ioctl(__shmid, ASHMEM_UNPIN, &pin);
     close(__shmid);
+
   }
 
   return ret;
+
 }
 
 int shmget(key_t __key, size_t __size, int __shmflg) {
-  (void) __shmflg;
-  int fd, ret;
+
+  (void)__shmflg;
+  int  fd, ret;
   char ourkey[11];
 
   fd = open(ASHMEM_DEVICE, O_RDWR);
-  if (fd < 0)
-    return fd;
+  if (fd < 0) return fd;
 
   sprintf(ourkey, "%d", __key);
   ret = ioctl(fd, ASHMEM_SET_NAME, ourkey);
-  if (ret < 0)
-    goto error;
+  if (ret < 0) goto error;
 
   ret = ioctl(fd, ASHMEM_SET_SIZE, __size);
-  if (ret < 0)
-    goto error;
+  if (ret < 0) goto error;
 
   return fd;
 
 error:
   close(fd);
   return ret;
+
 }
 
 void *shmat(int __shmid, const void *__shmaddr, int __shmflg) {
-  (void) __shmflg;
-  int size;
+
+  (void)__shmflg;
+  int   size;
   void *ptr;
 
   size = ioctl(__shmid, ASHMEM_GET_SIZE, NULL);
-  if (size < 0) {
-    return NULL;
-  }
+  if (size < 0) { return NULL; }
 
   ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, __shmid, 0);
-  if (ptr == MAP_FAILED) {
-    return NULL;
-  }
+  if (ptr == MAP_FAILED) { return NULL; }
 
   return ptr;
+
 }
 
-#endif /* !_ANDROID_ASHMEM_H */
-#endif /* !__ANDROID__ */
+  #endif                                              /* !_ANDROID_ASHMEM_H */
+#endif                                                      /* !__ANDROID__ */
+
diff --git a/include/cmplog.h b/include/cmplog.h
index bf557785..878ed60c 100644
--- a/include/cmplog.h
+++ b/include/cmplog.h
@@ -30,24 +30,25 @@
 
 #include "config.h"
 
+#define CMPLOG_LVL_MAX 3
+
 #define CMP_MAP_W 65536
-#define CMP_MAP_H 256
+#define CMP_MAP_H 32
 #define CMP_MAP_RTN_H (CMP_MAP_H / 4)
 
 #define SHAPE_BYTES(x) (x + 1)
 
-#define CMP_TYPE_INS 0
-#define CMP_TYPE_RTN 1
+#define CMP_TYPE_INS 1
+#define CMP_TYPE_RTN 2
 
 struct cmp_header {
 
-  unsigned hits : 20;
-
-  unsigned cnt : 20;
-  unsigned id : 16;
-
-  unsigned shape : 5;  // from 0 to 31
-  unsigned type : 1;
+  unsigned hits : 24;
+  unsigned id : 24;
+  unsigned shape : 5;
+  unsigned type : 2;
+  unsigned attribute : 4;
+  unsigned reserved : 5;
 
 } __attribute__((packed));
 
@@ -55,6 +56,8 @@ struct cmp_operands {
 
   u64 v0;
   u64 v1;
+  u64 v0_128;
+  u64 v1_128;
 
 };
 
diff --git a/include/common.h b/include/common.h
index 9490ec5f..bb8831f2 100644
--- a/include/common.h
+++ b/include/common.h
@@ -47,6 +47,7 @@ void   argv_cpy_free(char **argv);
 char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv);
 char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv);
 char * get_afl_env(char *env);
+u8 *   get_libqasan_path(u8 *own_loc);
 
 extern u8  be_quiet;
 extern u8 *doc_path;                    /* path to documentation dir        */
diff --git a/include/config.h b/include/config.h
index 973bbcbb..181285cd 100644
--- a/include/config.h
+++ b/include/config.h
@@ -10,7 +10,7 @@
                      Dominik Maier <mail@dmnk.co>
 
    Copyright 2016, 2017 Google Inc. All rights reserved.
-   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+   Copyright 2019-2021 AFLplusplus Project. All rights reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
@@ -23,8 +23,6 @@
 #ifndef _HAVE_CONFIG_H
 #define _HAVE_CONFIG_H
 
-#include "types.h"
-
 /* Version string: */
 
 // c = release, d = volatile github dev, e = experimental branch
@@ -36,6 +34,38 @@
  *                                                    *
  ******************************************************/
 
+/* CMPLOG/REDQUEEN TUNING
+ *
+ * Here you can modify tuning and solving options for CMPLOG.
+ * Note that these are run-time options for afl-fuzz, no target
+ * recompilation required.
+ *
+ */
+
+/* Enable arithmetic compare solving for both path */
+#define CMPLOG_SOLVE_ARITHMETIC
+
+/* Enable transform following (XOR/ADD/SUB manipulations, hex en/decoding) */
+#define CMPLOG_SOLVE_TRANSFORM
+
+/* if TRANSFORM is enabled, this additionally enables base64 en/decoding */
+// #define CMPLOG_SOLVE_TRANSFORM_BASE64
+
+/* If a redqueen pass finds more than one solve, try to combine them? */
+#define CMPLOG_COMBINE
+
+/* Minimum % of the corpus to perform cmplog on. Default: 20% */
+#define CMPLOG_CORPUS_PERCENT 20U
+
+/* Number of potential positions from which we decide if cmplog becomes
+   useless, default 16384 */
+#define CMPLOG_POSITIONS_MAX 16384U
+
+/* Maximum allowed fails per CMP value. Default: 32 * 3 */
+#define CMPLOG_FAIL_MAX 96
+
+/* Now non-cmplog configuration options */
+
 /* console output colors: There are three ways to configure its behavior
  * 1. default: colored outputs fixed on: defined USE_COLOR && defined
  * ALWAYS_COLORED The env var. AFL_NO_COLOR will have no effect
@@ -69,7 +99,7 @@
 /* If you want to have the original afl internal memory corruption checks.
    Disabled by default for speed. it is better to use "make ASAN_BUILD=1". */
 
-//#define _WANT_ORIGINAL_AFL_ALLOC
+// #define _WANT_ORIGINAL_AFL_ALLOC
 
 /* Comment out to disable fancy ANSI boxes and use poor man's 7-bit UI: */
 
diff --git a/include/coverage-32.h b/include/coverage-32.h
index a5cc498c..ca36c29f 100644
--- a/include/coverage-32.h
+++ b/include/coverage-32.h
@@ -97,7 +97,7 @@ inline void discover_word(u8 *ret, u32 *current, u32 *virgin) {
 #define PACK_SIZE 16
 inline u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end) {
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
     if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
diff --git a/include/coverage-64.h b/include/coverage-64.h
index 0ede5fa5..54fe9d33 100644
--- a/include/coverage-64.h
+++ b/include/coverage-64.h
@@ -145,7 +145,7 @@ inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
 
   __m256i zeroes = _mm256_setzero_si256();
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     __m256i value = *(__m256i *)current;
     __m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
@@ -172,7 +172,7 @@ inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
   #define PACK_SIZE 32
 inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
     if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
diff --git a/include/debug.h b/include/debug.h
index ef5b195b..fc1f39cb 100644
--- a/include/debug.h
+++ b/include/debug.h
@@ -295,8 +295,8 @@ static inline const char *colorfilter(const char *x) {
                                                                          \
     SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD                            \
          "\n[-] PROGRAM ABORT : " cRST   x);                               \
-    SAYF(cLRD "\n         Location : " cRST "%s(), %s:%d\n\n", __func__, \
-         __FILE__, __LINE__);                                            \
+    SAYF(cLRD "\n         Location : " cRST "%s(), %s:%u\n\n", __func__, \
+         __FILE__, (u32)__LINE__);                                       \
     exit(1);                                                             \
                                                                          \
   } while (0)
@@ -308,8 +308,8 @@ static inline const char *colorfilter(const char *x) {
                                                                          \
     SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD                            \
          "\n[-] PROGRAM ABORT : " cRST   x);                               \
-    SAYF(cLRD "\n    Stop location : " cRST "%s(), %s:%d\n\n", __func__, \
-         __FILE__, __LINE__);                                            \
+    SAYF(cLRD "\n    Stop location : " cRST "%s(), %s:%u\n\n", __func__, \
+         __FILE__, (u32)__LINE__);                                       \
     abort();                                                             \
                                                                          \
   } while (0)
@@ -322,8 +322,8 @@ static inline const char *colorfilter(const char *x) {
     fflush(stdout);                                                    \
     SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD                          \
          "\n[-]  SYSTEM ERROR : " cRST   x);                             \
-    SAYF(cLRD "\n    Stop location : " cRST "%s(), %s:%d\n", __func__, \
-         __FILE__, __LINE__);                                          \
+    SAYF(cLRD "\n    Stop location : " cRST "%s(), %s:%u\n", __func__, \
+         __FILE__, (u32)__LINE__);                                     \
     SAYF(cLRD "       OS message : " cRST "%s\n", strerror(errno));    \
     exit(1);                                                           \
                                                                        \
@@ -344,12 +344,12 @@ static inline const char *colorfilter(const char *x) {
 
 /* Show a prefixed debug output. */
 
-#define DEBUGF(x...)                         \
-  do {                                       \
-                                             \
-    SAYF(cMGN "[D] " cBRI "DEBUG: " cRST x); \
-    SAYF(cRST "");                           \
-                                             \
+#define DEBUGF(x...)                                    \
+  do {                                                  \
+                                                        \
+    fprintf(stderr, cMGN "[D] " cBRI "DEBUG: " cRST x); \
+    fprintf(stderr, cRST "");                           \
+                                                        \
   } while (0)
 
 /* Error-checking versions of read() and write() that call RPFATAL() as
diff --git a/include/envs.h b/include/envs.h
index 97367fae..36667ebc 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -16,6 +16,7 @@ static char *afl_environment_deprecated[] = {
 
 static char *afl_environment_variables[] = {
 
+    "_AFL_LTO_COMPILE",
     "AFL_ALIGNED_ALLOC",
     "AFL_ALLOW_TMP",
     "AFL_ANALYZE_HEX",
@@ -28,6 +29,7 @@ static char *afl_environment_variables[] = {
     "AFL_CC",
     "AFL_CMIN_ALLOW_ANY",
     "AFL_CMIN_CRASHES_ONLY",
+    "AFL_CMPLOG_ONLY_NEW",
     "AFL_CODE_END",
     "AFL_CODE_START",
     "AFL_COMPCOV_BINNAME",
@@ -42,11 +44,13 @@ static char *afl_environment_variables[] = {
     "AFL_DEBUG_GDB",
     "AFL_DISABLE_TRIM",
     "AFL_DONT_OPTIMIZE",
+    "AFL_DRIVER_STDERR_DUPLICATE_FILENAME",
     "AFL_DUMB_FORKSRV",
     "AFL_ENTRYPOINT",
     "AFL_EXIT_WHEN_DONE",
     "AFL_FAST_CAL",
     "AFL_FORCE_UI",
+    "AFL_FUZZER_ARGS",  // oss-fuzz
     "AFL_GCC_ALLOWLIST",
     "AFL_GCC_DENYLIST",
     "AFL_GCC_BLOCKLIST",
@@ -58,6 +62,7 @@ static char *afl_environment_variables[] = {
     "AFL_FORKSRV_INIT_TMOUT",
     "AFL_HARDEN",
     "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES",
+    "AFL_IGNORE_UNKNOWN_ENVS",
     "AFL_IMPORT_FIRST",
     "AFL_INST_LIBS",
     "AFL_INST_RATIO",
@@ -162,6 +167,7 @@ static char *afl_environment_variables[] = {
     "AFL_WINE_PATH",
     "AFL_NO_SNAPSHOT",
     "AFL_EXPAND_HAVOC_NOW",
+    "AFL_USE_QASAN",
     NULL
 
 };
diff --git a/include/forkserver.h b/include/forkserver.h
index 3019e289..ac027f81 100644
--- a/include/forkserver.h
+++ b/include/forkserver.h
@@ -83,6 +83,8 @@ typedef struct afl_forkserver {
 
   bool uses_asan;                       /* Target uses ASAN?                */
 
+  bool debug;                           /* debug mode?                      */
+
   bool uses_crash_exitcode;             /* Custom crash exitcode specified? */
   u8   crash_exitcode;                  /* The crash exitcode specified     */
 
@@ -118,11 +120,14 @@ void afl_fsrv_init(afl_forkserver_t *fsrv);
 void afl_fsrv_init_dup(afl_forkserver_t *fsrv_to, afl_forkserver_t *from);
 void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
                     volatile u8 *stop_soon_p, u8 debug_child_output);
+u32  afl_fsrv_get_mapsize(afl_forkserver_t *fsrv, char **argv,
+                          volatile u8 *stop_soon_p, u8 debug_child_output);
 void afl_fsrv_write_to_testcase(afl_forkserver_t *fsrv, u8 *buf, size_t len);
 fsrv_run_result_t afl_fsrv_run_target(afl_forkserver_t *fsrv, u32 timeout,
                                       volatile u8 *stop_soon_p);
 void              afl_fsrv_killall(void);
 void              afl_fsrv_deinit(afl_forkserver_t *fsrv);
+void              afl_fsrv_kill(afl_forkserver_t *fsrv);
 
 #ifdef __APPLE__
   #define MSG_FORK_ON_APPLE                                                    \
diff --git a/include/sharedmem.h b/include/sharedmem.h
index b15d0535..fdc947f9 100644
--- a/include/sharedmem.h
+++ b/include/sharedmem.h
@@ -51,6 +51,7 @@ typedef struct sharedmem {
   size_t map_size;                                 /* actual allocated size */
 
   int             cmplog_mode;
+  int             shmemfuzz_mode;
   struct cmp_map *cmp_map;
 
 } sharedmem_t;
diff --git a/include/types.h b/include/types.h
index 3e3bc953..7b94fb83 100644
--- a/include/types.h
+++ b/include/types.h
@@ -25,10 +25,15 @@
 
 #include <stdint.h>
 #include <stdlib.h>
+#include "config.h"
 
 typedef uint8_t  u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
+#ifdef WORD_SIZE_64
+typedef unsigned __int128 uint128_t;
+typedef uint128_t         u128;
+#endif
 
 /* Extended forkserver option values */
 
@@ -61,6 +66,10 @@ typedef int8_t  s8;
 typedef int16_t s16;
 typedef int32_t s32;
 typedef int64_t s64;
+#ifdef WORD_SIZE_64
+typedef __int128 int128_t;
+typedef int128_t s128;
+#endif
 
 #ifndef MIN
   #define MIN(a, b)           \
@@ -114,6 +123,33 @@ typedef int64_t s64;
                                                                                \
   })
 
+// It is impossible to define 128 bit constants, so ...
+#ifdef WORD_SIZE_64
+  #define SWAPN(_x, _l)                            \
+    ({                                             \
+                                                   \
+      u128  _res = (_x), _ret;                     \
+      char *d = (char *)&_ret, *s = (char *)&_res; \
+      int   i;                                     \
+      for (i = 0; i < 16; i++)                     \
+        d[15 - i] = s[i];                          \
+      u32 sr = 128U - ((_l) << 3U);                \
+      (_ret >>= sr);                               \
+      (u128) _ret;                                 \
+                                                   \
+    })
+#endif
+
+#define SWAPNN(_x, _y, _l)                     \
+  ({                                           \
+                                               \
+    char *d = (char *)(_x), *s = (char *)(_y); \
+    u32   i, l = (_l)-1;                       \
+    for (i = 0; i <= l; i++)                   \
+      d[l - i] = s[i];                         \
+                                               \
+  })
+
 #ifdef AFL_LLVM_PASS
   #if defined(__linux__) || !defined(__ANDROID__)
     #define AFL_SR(s) (srandom(s))
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc
index 235ee30f..948f8f3a 100644
--- a/instrumentation/LLVMInsTrim.so.cc
+++ b/instrumentation/LLVMInsTrim.so.cc
@@ -459,7 +459,7 @@ struct InsTrim : public ModulePass {
             BasicBlock *PBB = *PI;
             auto        It = PredMap.insert({PBB, genLabel()});
             unsigned    Label = It.first->second;
-            cur_loc = Label;
+            // cur_loc = Label;
             PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
 
           }
diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md
index b47b50f6..2116d24c 100644
--- a/instrumentation/README.instrument_list.md
+++ b/instrumentation/README.instrument_list.md
@@ -43,7 +43,14 @@ in any function where you want:
   * `__AFL_COVERAGE_DISCARD();` - reset all coverage gathered until this point
   * `__AFL_COVERAGE_SKIP();` - mark this test case as unimportant. Whatever happens, afl-fuzz will ignore it.
 
-## 3) Selective instrumenation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
+A special function is `__afl_coverage_interesting`.
+To use this, you must define `void __afl_coverage_interesting(u8 val, u32 id);`.
+Then you can use this function globally, where the `val` parameter can be set
+by you, the `id` parameter is for afl-fuzz and will be overwritten.
+Note that useful parameters for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
+A value of e.g. 33 will be seen as 32 for coverage purposes.
+
+## 3) Selective instrumentation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
 
 This feature is equivalent to llvm 12 sancov feature and allows to specify
 on a filename and/or function name level to instrument these or skip them.
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
index 016ac71f..3026abc8 100644
--- a/instrumentation/SanitizerCoverageLTO.so.cc
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -760,7 +760,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    addedNull = true;
+                    // addedNull = true;
 
                   }
 
@@ -1237,6 +1237,25 @@ void ModuleSanitizerCoverage::instrumentFunction(
 
   for (auto &BB : F) {
 
+    for (auto &IN : BB) {
+
+      CallInst *callInst = nullptr;
+
+      if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+        Function *Callee = callInst->getCalledFunction();
+        if (!Callee) continue;
+        if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+        StringRef FuncName = Callee->getName();
+        if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue;
+
+        Value *val = ConstantInt::get(Int32Ty, ++afl_global_id);
+        callInst->setOperand(1, val);
+
+      }
+
+    }
+
     if (shouldInstrumentBlock(F, &BB, DT, PDT, Options))
       BlocksToInstrument.push_back(&BB);
     for (auto &Inst : BB) {
@@ -1338,6 +1357,7 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function &             F,
 
   if (AllBlocks.empty()) return false;
   CreateFunctionLocalArrays(F, AllBlocks);
+
   for (size_t i = 0, N = AllBlocks.size(); i < N; i++) {
 
     // afl++ START
diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc
index ecd6bc9b..9b1351b0 100644
--- a/instrumentation/SanitizerCoveragePCGUARD.so.cc
+++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc
@@ -311,7 +311,8 @@ class ModuleSanitizerCoverage {
                                                     Function &F, Type *Ty,
                                                     const char *Section);
   GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks);
-  void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+  void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks,
+                                 uint32_t special);
   void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
                              bool IsLeafFunc = true);
   Function *CreateInitCallsForSections(Module &M, const char *CtorName,
@@ -970,11 +971,11 @@ GlobalVariable *ModuleSanitizerCoverage::CreatePCArray(
 }
 
 void ModuleSanitizerCoverage::CreateFunctionLocalArrays(
-    Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+    Function &F, ArrayRef<BasicBlock *> AllBlocks, uint32_t special) {
 
   if (Options.TracePCGuard)
     FunctionGuardArray = CreateFunctionLocalArrayInSection(
-        AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
+        AllBlocks.size() + special, F, Int32Ty, SanCovGuardsSectionName);
 
   if (Options.Inline8bitCounters)
     Function8bitCounterArray = CreateFunctionLocalArrayInSection(
@@ -993,9 +994,38 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function &             F,
                                              bool IsLeafFunc) {
 
   if (AllBlocks.empty()) return false;
-  CreateFunctionLocalArrays(F, AllBlocks);
+
+  uint32_t special = 0;
+  for (auto &BB : F) {
+
+    for (auto &IN : BB) {
+
+      CallInst *callInst = nullptr;
+
+      if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+        Function *Callee = callInst->getCalledFunction();
+        if (!Callee) continue;
+        if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+        StringRef FuncName = Callee->getName();
+        if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue;
+
+        uint32_t id = 1 + instr + (uint32_t)AllBlocks.size() + special++;
+        Value *  val = ConstantInt::get(Int32Ty, id);
+        callInst->setOperand(1, val);
+
+      }
+
+    }
+
+  }
+
+  CreateFunctionLocalArrays(F, AllBlocks, special);
   for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
     InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
+
+  instr += special;
+
   return true;
 
 }
@@ -1058,7 +1088,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
 
       }
 
-      llvm::sort(Initializers.begin() + 2, Initializers.end(),
+      llvm::sort(drop_begin(Initializers, 2),
                  [](const Constant *A, const Constant *B) {
 
                    return cast<ConstantInt>(A)->getLimitedValue() <
@@ -1106,10 +1136,10 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
   for (auto GEP : GepTraceTargets) {
 
     IRBuilder<> IRB(GEP);
-    for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
-      if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+    for (Use &Idx : GEP->indices())
+      if (!isa<ConstantInt>(Idx) && Idx->getType()->isIntegerTy())
         IRB.CreateCall(SanCovTraceGepFunction,
-                       {IRB.CreateIntCast(*I, IntptrTy, true)});
+                       {IRB.CreateIntCast(Idx, IntptrTy, true)});
 
   }
 
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index e31bff16..5fb715e2 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -70,7 +70,7 @@
    run. It will end up as .comm, so it shouldn't be too wasteful. */
 
 #if MAP_SIZE <= 65536
-  #define MAP_INITIAL_SIZE 256000
+  #define MAP_INITIAL_SIZE 2097152
 #else
   #define MAP_INITIAL_SIZE MAP_SIZE
 #endif
@@ -161,7 +161,7 @@ void send_forkserver_error(int error) {
   u32 status;
   if (!error || error > 0xffff) return;
   status = (FS_OPT_ERROR | FS_OPT_SET_ERROR(error));
-  if (write(FORKSRV_FD + 1, (char *)&status, 4) != 4) return;
+  if (write(FORKSRV_FD + 1, (char *)&status, 4) != 4) { return; }
 
 }
 
@@ -368,8 +368,8 @@ static void __afl_map_shm(void) {
 
     if (__afl_map_size && __afl_map_size > MAP_SIZE) {
 
-      u8 *map_env = getenv("AFL_MAP_SIZE");
-      if (!map_env || atoi(map_env) < MAP_SIZE) {
+      u8 *map_env = (u8 *)getenv("AFL_MAP_SIZE");
+      if (!map_env || atoi((char *)map_env) < MAP_SIZE) {
 
         send_forkserver_error(FS_ERROR_MAP_SIZE);
         _exit(1);
@@ -378,7 +378,7 @@ static void __afl_map_shm(void) {
 
     }
 
-    __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0);
+    __afl_area_ptr = (u8 *)shmat(shm_id, (void *)__afl_map_addr, 0);
 
     /* Whooooops. */
 
@@ -405,9 +405,9 @@ static void __afl_map_shm(void) {
 
              __afl_map_addr) {
 
-    __afl_area_ptr =
-        mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
-             MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+    __afl_area_ptr = (u8 *)mmap(
+        (void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
+        MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 
     if (__afl_area_ptr == MAP_FAILED) {
 
@@ -425,7 +425,7 @@ static void __afl_map_shm(void) {
 
     if (__afl_final_loc > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr = malloc(__afl_final_loc);
+      __afl_area_ptr = (u8 *)malloc(__afl_final_loc);
 
     }
 
@@ -439,7 +439,7 @@ static void __afl_map_shm(void) {
 
     if (__afl_map_size > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr_dummy = malloc(__afl_map_size);
+      __afl_area_ptr_dummy = (u8 *)malloc(__afl_map_size);
 
       if (__afl_area_ptr_dummy) {
 
@@ -505,7 +505,7 @@ static void __afl_map_shm(void) {
 #else
     u32 shm_id = atoi(id_str);
 
-    __afl_cmp_map = shmat(shm_id, NULL, 0);
+    __afl_cmp_map = (struct cmp_map *)shmat(shm_id, NULL, 0);
 #endif
 
     __afl_cmp_map_backup = __afl_cmp_map;
@@ -544,11 +544,11 @@ static void __afl_start_snapshots(void) {
   if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
   memcpy(tmp, &status, 4);
 
-  if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
+  if (write(FORKSRV_FD + 1, tmp, 4) != 4) { return; }
 
   if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
 
-    if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+    if (read(FORKSRV_FD, &was_killed, 4) != 4) { _exit(1); }
 
     if (getenv("AFL_DEBUG")) {
 
@@ -1090,7 +1090,7 @@ __attribute__((constructor(0))) void __afl_auto_first(void) {
   if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
   u8 *ptr;
 
-  ptr = (u8 *)malloc(1024000);
+  ptr = (u8 *)malloc(2097152);
 
   if (ptr && (ssize_t)ptr != -1) {
 
@@ -1169,8 +1169,9 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   if (getenv("AFL_DEBUG")) {
 
-    fprintf(stderr, "Running __sanitizer_cov_trace_pc_guard_init: %p-%p\n",
-            start, stop);
+    fprintf(stderr,
+            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges)\n",
+            start, stop, (unsigned long)(stop - start));
 
   }
 
@@ -1207,22 +1208,33 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
 ///// CmpLog instrumentation
 
-void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) {
+void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2, uint8_t attr) {
 
-  if (unlikely(!__afl_cmp_map)) return;
+  // fprintf(stderr, "hook1 arg0=%02x arg1=%02x attr=%u\n",
+  //         (u8) arg1, (u8) arg2, attr);
+
+  if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
 
-  __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+  u32 hits;
 
-  u32 hits = __afl_cmp_map->headers[k].hits;
-  __afl_cmp_map->headers[k].hits = hits + 1;
-  // if (!__afl_cmp_map->headers[k].cnt)
-  //  __afl_cmp_map->headers[k].cnt = __afl_cmp_counter++;
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
 
-  __afl_cmp_map->headers[k].shape = 0;
+    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = 0;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+  }
+
+  __afl_cmp_map->headers[k].attribute = attr;
 
   hits &= CMP_MAP_H - 1;
   __afl_cmp_map->log[k][hits].v0 = arg1;
@@ -1230,20 +1242,36 @@ void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) {
 
 }
 
-void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) {
+void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2, uint8_t attr) {
 
-  if (unlikely(!__afl_cmp_map)) return;
+  if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
 
-  __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+  u32 hits;
 
-  u32 hits = __afl_cmp_map->headers[k].hits;
-  __afl_cmp_map->headers[k].hits = hits + 1;
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
 
-  __afl_cmp_map->headers[k].shape = 1;
+    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = 1;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (!__afl_cmp_map->headers[k].shape) {
+
+      __afl_cmp_map->headers[k].shape = 1;
+
+    }
+
+  }
+
+  __afl_cmp_map->headers[k].attribute = attr;
 
   hits &= CMP_MAP_H - 1;
   __afl_cmp_map->log[k][hits].v0 = arg1;
@@ -1251,20 +1279,38 @@ void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) {
 
 }
 
-void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) {
+void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2, uint8_t attr) {
 
-  if (unlikely(!__afl_cmp_map)) return;
+  // fprintf(stderr, "hook4 arg0=%x arg1=%x attr=%u\n", arg1, arg2, attr);
+
+  if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
 
-  __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+  u32 hits;
 
-  u32 hits = __afl_cmp_map->headers[k].hits;
-  __afl_cmp_map->headers[k].hits = hits + 1;
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
 
-  __afl_cmp_map->headers[k].shape = 3;
+    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = 3;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < 3) {
+
+      __afl_cmp_map->headers[k].shape = 3;
+
+    }
+
+  }
+
+  __afl_cmp_map->headers[k].attribute = attr;
 
   hits &= CMP_MAP_H - 1;
   __afl_cmp_map->log[k][hits].v0 = arg1;
@@ -1272,20 +1318,38 @@ void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) {
 
 }
 
-void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) {
+void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2, uint8_t attr) {
 
-  if (unlikely(!__afl_cmp_map)) return;
+  // fprintf(stderr, "hook8 arg0=%lx arg1=%lx attr=%u\n", arg1, arg2, attr);
+
+  if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
 
-  __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+  u32 hits;
 
-  u32 hits = __afl_cmp_map->headers[k].hits;
-  __afl_cmp_map->headers[k].hits = hits + 1;
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = 7;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < 7) {
+
+      __afl_cmp_map->headers[k].shape = 7;
+
+    }
+
+  }
 
-  __afl_cmp_map->headers[k].shape = 7;
+  __afl_cmp_map->headers[k].attribute = attr;
 
   hits &= CMP_MAP_H - 1;
   __afl_cmp_map->log[k][hits].v0 = arg1;
@@ -1293,35 +1357,154 @@ void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) {
 
 }
 
-#if defined(__APPLE__)
-  #pragma weak __sanitizer_cov_trace_const_cmp1 = __cmplog_ins_hook1
-  #pragma weak __sanitizer_cov_trace_const_cmp2 = __cmplog_ins_hook2
-  #pragma weak __sanitizer_cov_trace_const_cmp4 = __cmplog_ins_hook4
-  #pragma weak __sanitizer_cov_trace_const_cmp8 = __cmplog_ins_hook8
+#ifdef WORD_SIZE_64
+// support for u24 to u120 via llvm _ExitInt(). size is in bytes minus 1
+void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t attr,
+                        uint8_t size) {
 
-  #pragma weak __sanitizer_cov_trace_cmp1 = __cmplog_ins_hook1
-  #pragma weak __sanitizer_cov_trace_cmp2 = __cmplog_ins_hook2
-  #pragma weak __sanitizer_cov_trace_cmp4 = __cmplog_ins_hook4
-  #pragma weak __sanitizer_cov_trace_cmp8 = __cmplog_ins_hook8
-#else
-void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2)
-    __attribute__((alias("__cmplog_ins_hook1")));
-void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2)
-    __attribute__((alias("__cmplog_ins_hook2")));
-void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2)
-    __attribute__((alias("__cmplog_ins_hook4")));
-void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2)
-    __attribute__((alias("__cmplog_ins_hook8")));
-
-void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2)
-    __attribute__((alias("__cmplog_ins_hook1")));
-void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2)
-    __attribute__((alias("__cmplog_ins_hook2")));
-void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2)
-    __attribute__((alias("__cmplog_ins_hook4")));
-void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2)
-    __attribute__((alias("__cmplog_ins_hook8")));
-#endif                                                /* defined(__APPLE__) */
+  // fprintf(stderr, "hookN arg0=%llx:%llx arg1=%llx:%llx bytes=%u attr=%u\n",
+  // (u64)(arg1 >> 64), (u64)arg1, (u64)(arg2 >> 64), (u64)arg2, size + 1,
+  // attr);
+
+  if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
+
+  uintptr_t k = (uintptr_t)__builtin_return_address(0);
+  k = (k >> 4) ^ (k << 8);
+  k &= CMP_MAP_W - 1;
+
+  u32 hits;
+
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = size;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < size) {
+
+      __afl_cmp_map->headers[k].shape = size;
+
+    }
+
+  }
+
+  __afl_cmp_map->headers[k].attribute = attr;
+
+  hits &= CMP_MAP_H - 1;
+  __afl_cmp_map->log[k][hits].v0 = (u64)arg1;
+  __afl_cmp_map->log[k][hits].v1 = (u64)arg2;
+
+  if (size > 7) {
+
+    __afl_cmp_map->log[k][hits].v0_128 = (u64)(arg1 >> 64);
+    __afl_cmp_map->log[k][hits].v1_128 = (u64)(arg2 >> 64);
+
+  }
+
+}
+
+void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2, uint8_t attr) {
+
+  if (unlikely(!__afl_cmp_map)) return;
+
+  uintptr_t k = (uintptr_t)__builtin_return_address(0);
+  k = (k >> 4) ^ (k << 8);
+  k &= CMP_MAP_W - 1;
+
+  u32 hits;
+
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = 15;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < 15) {
+
+      __afl_cmp_map->headers[k].shape = 15;
+
+    }
+
+  }
+
+  __afl_cmp_map->headers[k].attribute = attr;
+
+  hits &= CMP_MAP_H - 1;
+  __afl_cmp_map->log[k][hits].v0 = (u64)arg1;
+  __afl_cmp_map->log[k][hits].v1 = (u64)arg2;
+  __afl_cmp_map->log[k][hits].v0_128 = (u64)(arg1 >> 64);
+  __afl_cmp_map->log[k][hits].v1_128 = (u64)(arg2 >> 64);
+
+}
+
+#endif
+
+void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) {
+
+  __cmplog_ins_hook1(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) {
+
+  __cmplog_ins_hook1(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) {
+
+  __cmplog_ins_hook2(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) {
+
+  __cmplog_ins_hook2(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) {
+
+  __cmplog_ins_hook4(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) {
+
+  __cmplog_ins_hook4(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) {
+
+  __cmplog_ins_hook8(arg1, arg2, 0);
+
+}
+
+void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) {
+
+  __cmplog_ins_hook8(arg1, arg2, 0);
+
+}
+
+#ifdef WORD_SIZE_64
+void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) {
+
+  __cmplog_ins_hook16(arg1, arg2, 0);
+
+}
+
+#endif
 
 void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
 
@@ -1333,12 +1516,28 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
     k = (k >> 4) ^ (k << 8);
     k &= CMP_MAP_W - 1;
 
-    __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+    u32 hits;
 
-    u32 hits = __afl_cmp_map->headers[k].hits;
-    __afl_cmp_map->headers[k].hits = hits + 1;
+    if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS) {
 
-    __afl_cmp_map->headers[k].shape = 7;
+      __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+      hits = 0;
+      __afl_cmp_map->headers[k].hits = 1;
+      __afl_cmp_map->headers[k].shape = 7;
+
+    } else {
+
+      hits = __afl_cmp_map->headers[k].hits++;
+
+      if (__afl_cmp_map->headers[k].shape < 7) {
+
+        __afl_cmp_map->headers[k].shape = 7;
+
+      }
+
+    }
+
+    __afl_cmp_map->headers[k].attribute = 1;
 
     hits &= CMP_MAP_H - 1;
     __afl_cmp_map->log[k][hits].v0 = val;
@@ -1353,7 +1552,7 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
 // to avoid to call it on .text addresses
 static int area_is_mapped(void *ptr, size_t len) {
 
-  char *p = ptr;
+  char *p = (char *)ptr;
   char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
 
   int r = msync(page, (p - page) + len, MS_ASYNC);
@@ -1364,6 +1563,18 @@ static int area_is_mapped(void *ptr, size_t len) {
 
 void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
+  /*
+    u32 i;
+    if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
+    fprintf(stderr, "rtn arg0=");
+    for (i = 0; i < 24; i++)
+      fprintf(stderr, "%02x", ptr1[i]);
+    fprintf(stderr, " arg1=");
+    for (i = 0; i < 24; i++)
+      fprintf(stderr, "%02x", ptr2[i]);
+    fprintf(stderr, "\n");
+  */
+
   if (unlikely(!__afl_cmp_map)) return;
 
   if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
@@ -1372,12 +1583,26 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
 
-  __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+  u32 hits;
+
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+    hits = 0;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = 31;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < 31) {
+
+      __afl_cmp_map->headers[k].shape = 31;
 
-  u32 hits = __afl_cmp_map->headers[k].hits;
-  __afl_cmp_map->headers[k].hits = hits + 1;
+    }
 
-  __afl_cmp_map->headers[k].shape = 31;
+  }
 
   hits &= CMP_MAP_RTN_H - 1;
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
@@ -1387,6 +1612,71 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
 }
 
+// gcc libstdc++
+// _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc
+static u8 *get_gcc_stdstring(u8 *string) {
+
+  u32 *len = (u32 *)(string + 8);
+
+  if (*len < 16) {  // in structure
+
+    return (string + 16);
+
+  } else {  // in memory
+
+    u8 **ptr = (u8 **)string;
+    return (*ptr);
+
+  }
+
+}
+
+// llvm libc++ _ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocator
+//             IcEEE7compareEmmPKcm
+static u8 *get_llvm_stdstring(u8 *string) {
+
+  // length is in: if ((string[0] & 1) == 0) u8 len = (string[0] >> 1);
+  // or: if (string[0] & 1) u32 *len = (u32 *) (string + 8);
+
+  if (string[0] & 1) {  // in memory
+
+    u8 **ptr = (u8 **)(string + 16);
+    return (*ptr);
+
+  } else {  // in structure
+
+    return (string + 1);
+
+  }
+
+}
+
+void __cmplog_rtn_gcc_stdstring_cstring(u8 *stdstring, u8 *cstring) {
+
+  __cmplog_rtn_hook(get_gcc_stdstring(stdstring), cstring);
+
+}
+
+void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
+
+  __cmplog_rtn_hook(get_gcc_stdstring(stdstring1),
+                    get_gcc_stdstring(stdstring2));
+
+}
+
+void __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring, u8 *cstring) {
+
+  __cmplog_rtn_hook(get_llvm_stdstring(stdstring), cstring);
+
+}
+
+void __cmplog_rtn_llvm_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
+
+  __cmplog_rtn_hook(get_llvm_stdstring(stdstring1),
+                    get_llvm_stdstring(stdstring2));
+
+}
+
 /* COVERAGE manipulation features */
 
 // this variable is then used in the shm setup to create an additional map
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index a27c4069..aa54f4f7 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -351,7 +351,7 @@ static std::string getSourceName(llvm::Function *F) {
 
     if (cDILoc) { instFilename = cDILoc->getFilename(); }
 
-    if (instFilename.str().empty()) {
+    if (instFilename.str().empty() && cDILoc) {
 
       /* If the original location is empty, try using the inlined location
        */
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
index a4b33732..6f34ac5a 100644
--- a/instrumentation/afl-llvm-dict2file.so.cc
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -430,7 +430,6 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                 if (literalLength + 1 == optLength) {
 
                   Str2.append("\0", 1);  // add null byte
-                  addedNull = true;
 
                 }
 
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
index 13dca8c4..f5c24e41 100644
--- a/instrumentation/afl-llvm-lto-instrumentation.so.cc
+++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc
@@ -69,7 +69,8 @@ class AFLLTOPass : public ModulePass {
 
     if (getenv("AFL_DEBUG")) debug = 1;
     if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
-      if ((afl_global_id = atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
+      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 ||
+          afl_global_id >= MAP_SIZE)
         FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n",
               ptr, MAP_SIZE - 1);
 
@@ -88,7 +89,7 @@ class AFLLTOPass : public ModulePass {
   bool runOnModule(Module &M) override;
 
  protected:
-  int      afl_global_id = 1, autodictionary = 1;
+  uint32_t afl_global_id = 1, autodictionary = 1;
   uint32_t function_minimum_size = 1;
   uint32_t inst_blocks = 0, inst_funcs = 0, total_instr = 0;
   uint64_t map_addr = 0x10000;
@@ -545,7 +546,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    addedNull = true;
+                    // addedNull = true;
 
                   }
 
@@ -800,7 +801,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
           if (documentFile) {
 
-            fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%d\n",
+            fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n",
                     moduleID, F.getName().str().c_str(), afl_global_id);
 
           }
@@ -872,10 +873,10 @@ bool AFLLTOPass::runOnModule(Module &M) {
     while ((map = map >> 1))
       pow2map++;
     WARNF(
-        "We have %d blocks to instrument but the map size is only %u. Either "
+        "We have %u blocks to instrument but the map size is only %u. Either "
         "edit config.h and set MAP_SIZE_POW2 from %d to %u, then recompile "
         "afl-fuzz and llvm_mode and then make this target - or set "
-        "AFL_MAP_SIZE with at least size %d when running afl-fuzz with this "
+        "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this "
         "target.",
         afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id);
 
@@ -925,7 +926,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
       uint32_t write_loc = afl_global_id;
 
-      if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+      if (afl_global_id % 32) write_loc = (((afl_global_id + 32) >> 4) << 4);
 
       GlobalVariable *AFLFinalLoc = new GlobalVariable(
           M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
index 57ff3b47..16fd9c94 100644
--- a/instrumentation/afl-llvm-pass.so.cc
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -538,6 +538,7 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1),
                                 AFLPrevLoc);
+        Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
       }
 
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index 3499ccf0..6b071b48 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -85,9 +85,25 @@ class CmpLogInstructions : public ModulePass {
 
 char CmpLogInstructions::ID = 0;
 
+template <class Iterator>
+Iterator Unique(Iterator first, Iterator last) {
+
+  while (first != last) {
+
+    Iterator next(first);
+    last = std::remove(++next, last, *first);
+    first = next;
+
+  }
+
+  return last;
+
+}
+
 bool CmpLogInstructions::hookInstrs(Module &M) {
 
   std::vector<Instruction *> icomps;
+  std::vector<SwitchInst *>  switches;
   LLVMContext &              C = M.getContext();
 
   Type *       VoidTy = Type::getVoidTy(C);
@@ -95,13 +111,17 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
   IntegerType *Int16Ty = IntegerType::getInt16Ty(C);
   IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
   IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
+  IntegerType *Int128Ty = IntegerType::getInt128Ty(C);
+
+  char *is_lto = getenv("_AFL_LTO_COMPILE");
 
 #if LLVM_VERSION_MAJOR < 9
   Constant *
 #else
   FunctionCallee
 #endif
-      c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty
+      c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty,
+                                 Int8Ty
 #if LLVM_VERSION_MAJOR < 5
                                  ,
                                  NULL
@@ -118,7 +138,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 #else
   FunctionCallee
 #endif
-      c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty
+      c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty,
+                                 Int8Ty
 #if LLVM_VERSION_MAJOR < 5
                                  ,
                                  NULL
@@ -135,7 +156,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 #else
   FunctionCallee
 #endif
-      c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty
+      c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty,
+                                 Int8Ty
 #if LLVM_VERSION_MAJOR < 5
                                  ,
                                  NULL
@@ -152,7 +174,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 #else
   FunctionCallee
 #endif
-      c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty
+      c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty,
+                                 Int8Ty
 #if LLVM_VERSION_MAJOR < 5
                                  ,
                                  NULL
@@ -164,6 +187,42 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
   FunctionCallee cmplogHookIns8 = c8;
 #endif
 
+#if LLVM_VERSION_MAJOR < 9
+  Constant *
+#else
+  FunctionCallee
+#endif
+      c16 = M.getOrInsertFunction("__cmplog_ins_hook16", VoidTy, Int128Ty,
+                                  Int128Ty, Int8Ty
+#if LLVM_VERSION_MAJOR < 5
+                                  ,
+                                  NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR < 9
+  Function *cmplogHookIns16 = cast<Function>(c16);
+#else
+  FunctionCallee cmplogHookIns16 = c16;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+  Constant *
+#else
+  FunctionCallee
+#endif
+      cN = M.getOrInsertFunction("__cmplog_ins_hookN", VoidTy, Int128Ty,
+                                 Int128Ty, Int8Ty, Int8Ty
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR < 9
+  Function *cmplogHookInsN = cast<Function>(cN);
+#else
+  FunctionCallee cmplogHookInsN = cN;
+#endif
+
   /* iterate over all functions, bbs and instruction and add suitable calls */
   for (auto &F : M) {
 
@@ -174,35 +233,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       for (auto &IN : BB) {
 
         CmpInst *selectcmpInst = nullptr;
-
         if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
 
-          if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_NE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_UGT ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SGT ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_ULT ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SLT ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_UGE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SGE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_ULE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SLE ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_OGE ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_UGE ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_OLE ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_ULE ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_OGT ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_UGT ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_OLT ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_ULT ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_UEQ ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_OEQ ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_UNE ||
-              selectcmpInst->getPredicate() == CmpInst::FCMP_ONE) {
-
-            icomps.push_back(selectcmpInst);
+          icomps.push_back(selectcmpInst);
 
-          }
+        }
+
+        SwitchInst *switchInst = nullptr;
+        if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) {
+
+          if (switchInst->getNumCases() > 1) { switches.push_back(switchInst); }
 
         }
 
@@ -212,101 +252,491 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
   }
 
-  if (!icomps.size()) return false;
-  // if (!be_quiet) errs() << "Hooking " << icomps.size() << " cmp
-  // instructions\n";
+  // unique the collected switches
+  switches.erase(Unique(switches.begin(), switches.end()), switches.end());
+
+  // Instrument switch values for cmplog
+  if (switches.size()) {
+
+    if (!be_quiet)
+      errs() << "Hooking " << switches.size() << " switch instructions\n";
+
+    for (auto &SI : switches) {
+
+      Value *       Val = SI->getCondition();
+      unsigned int  max_size = Val->getType()->getIntegerBitWidth(), cast_size;
+      unsigned char do_cast = 0;
+
+      if (!SI->getNumCases() || max_size < 16) { continue; }
+
+      if (max_size % 8) {
+
+        if (is_lto) {
+
+          continue;  // LTO cannot bitcast from _ExtInt() :(
+
+        } else {
+
+          max_size = (((max_size / 8) + 1) * 8);
+          do_cast = 1;
+
+        }
+
+      }
+
+      IRBuilder<> IRB(SI->getParent());
+      IRB.SetInsertPoint(SI);
+
+      if (max_size > 128) {
+
+        if (!be_quiet) {
+
+          fprintf(stderr,
+                  "Cannot handle this switch bit size: %u (truncating)\n",
+                  max_size);
+
+        }
+
+        if (is_lto) { continue; }  // LTO cannot bitcast _ExtInt() :(
+        max_size = 128;
+        do_cast = 1;
+
+      }
+
+      // do we need to cast?
+      switch (max_size) {
+
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+        case 128:
+          cast_size = max_size;
+          break;
+        default:
+          if (is_lto) { continue; }  // LTO cannot bitcast _ExtInt() :(
+          cast_size = 128;
+          do_cast = 1;
+
+      }
+
+      Value *CompareTo = Val;
+
+      if (do_cast) {
+
+        ConstantInt *cint = dyn_cast<ConstantInt>(Val);
+        if (cint) {
+
+          uint64_t val = cint->getZExtValue();
+          // fprintf(stderr, "ConstantInt: %lu\n", val);
+          switch (cast_size) {
+
+            case 8:
+              CompareTo = ConstantInt::get(Int8Ty, val);
+              break;
+            case 16:
+              CompareTo = ConstantInt::get(Int16Ty, val);
+              break;
+            case 32:
+              CompareTo = ConstantInt::get(Int32Ty, val);
+              break;
+            case 64:
+              CompareTo = ConstantInt::get(Int64Ty, val);
+              break;
+            case 128:
+              CompareTo = ConstantInt::get(Int128Ty, val);
+              break;
+
+          }
+
+        } else {
+
+          CompareTo = IRB.CreateBitCast(Val, IntegerType::get(C, cast_size));
+
+        }
+
+      }
+
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
+           ++i) {
+
+#if LLVM_VERSION_MAJOR < 5
+        ConstantInt *cint = i.getCaseValue();
+#else
+        ConstantInt *cint = i->getCaseValue();
+#endif
+
+        if (cint) {
+
+          std::vector<Value *> args;
+          args.push_back(CompareTo);
+
+          Value *new_param = cint;
 
-  for (auto &selectcmpInst : icomps) {
+          if (do_cast) {
 
-    IRBuilder<> IRB(selectcmpInst->getParent());
-    IRB.SetInsertPoint(selectcmpInst);
+            uint64_t val = cint->getZExtValue();
+            // fprintf(stderr, "ConstantInt: %lu\n", val);
+            switch (cast_size) {
 
-    auto op0 = selectcmpInst->getOperand(0);
-    auto op1 = selectcmpInst->getOperand(1);
+              case 8:
+                new_param = ConstantInt::get(Int8Ty, val);
+                break;
+              case 16:
+                new_param = ConstantInt::get(Int16Ty, val);
+                break;
+              case 32:
+                new_param = ConstantInt::get(Int32Ty, val);
+                break;
+              case 64:
+                new_param = ConstantInt::get(Int64Ty, val);
+                break;
+              case 128:
+                new_param = ConstantInt::get(Int128Ty, val);
+                break;
 
-    IntegerType *        intTyOp0 = NULL;
-    IntegerType *        intTyOp1 = NULL;
-    unsigned             max_size = 0;
-    std::vector<Value *> args;
+            }
 
-    if (selectcmpInst->getOpcode() == Instruction::FCmp) {
+          }
+
+          if (new_param) {
+
+            args.push_back(new_param);
+            ConstantInt *attribute = ConstantInt::get(Int8Ty, 1);
+            args.push_back(attribute);
+            if (cast_size != max_size) {
+
+              ConstantInt *bitsize =
+                  ConstantInt::get(Int8Ty, (max_size / 8) - 1);
+              args.push_back(bitsize);
+
+            }
+
+            switch (cast_size) {
+
+              case 8:
+                IRB.CreateCall(cmplogHookIns1, args);
+                break;
+              case 16:
+                IRB.CreateCall(cmplogHookIns2, args);
+                break;
+              case 32:
+                IRB.CreateCall(cmplogHookIns4, args);
+                break;
+              case 64:
+                IRB.CreateCall(cmplogHookIns8, args);
+                break;
+              case 128:
+#ifdef WORD_SIZE_64
+                if (max_size == 128) {
+
+                  IRB.CreateCall(cmplogHookIns16, args);
+
+                } else {
+
+                  IRB.CreateCall(cmplogHookInsN, args);
+
+                }
+
+#endif
+                break;
+              default:
+                break;
+
+            }
+
+          }
+
+        }
+
+      }
+
+    }
 
-      auto ty0 = op0->getType();
-      if (ty0->isHalfTy()
+  }
+
+  if (icomps.size()) {
+
+    // if (!be_quiet) errs() << "Hooking " << icomps.size() <<
+    //                          " cmp instructions\n";
+
+    for (auto &selectcmpInst : icomps) {
+
+      IRBuilder<> IRB(selectcmpInst->getParent());
+      IRB.SetInsertPoint(selectcmpInst);
+
+      Value *op0 = selectcmpInst->getOperand(0);
+      Value *op1 = selectcmpInst->getOperand(1);
+
+      IntegerType *        intTyOp0 = NULL;
+      IntegerType *        intTyOp1 = NULL;
+      unsigned             max_size = 0, cast_size = 0;
+      unsigned char        attr = 0, do_cast = 0;
+      std::vector<Value *> args;
+
+      CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst);
+
+      if (!cmpInst) { continue; }
+
+      switch (cmpInst->getPredicate()) {
+
+        case CmpInst::ICMP_NE:
+        case CmpInst::FCMP_UNE:
+        case CmpInst::FCMP_ONE:
+          break;
+        case CmpInst::ICMP_EQ:
+        case CmpInst::FCMP_UEQ:
+        case CmpInst::FCMP_OEQ:
+          attr += 1;
+          break;
+        case CmpInst::ICMP_UGT:
+        case CmpInst::ICMP_SGT:
+        case CmpInst::FCMP_OGT:
+        case CmpInst::FCMP_UGT:
+          attr += 2;
+          break;
+        case CmpInst::ICMP_UGE:
+        case CmpInst::ICMP_SGE:
+        case CmpInst::FCMP_OGE:
+        case CmpInst::FCMP_UGE:
+          attr += 3;
+          break;
+        case CmpInst::ICMP_ULT:
+        case CmpInst::ICMP_SLT:
+        case CmpInst::FCMP_OLT:
+        case CmpInst::FCMP_ULT:
+          attr += 4;
+          break;
+        case CmpInst::ICMP_ULE:
+        case CmpInst::ICMP_SLE:
+        case CmpInst::FCMP_OLE:
+        case CmpInst::FCMP_ULE:
+          attr += 5;
+          break;
+        default:
+          break;
+
+      }
+
+      if (selectcmpInst->getOpcode() == Instruction::FCmp) {
+
+        auto ty0 = op0->getType();
+        if (ty0->isHalfTy()
 #if LLVM_VERSION_MAJOR >= 11
-          || ty0->isBFloatTy()
+            || ty0->isBFloatTy()
 #endif
-      )
-        max_size = 16;
-      else if (ty0->isFloatTy())
-        max_size = 32;
-      else if (ty0->isDoubleTy())
-        max_size = 64;
+        )
+          max_size = 16;
+        else if (ty0->isFloatTy())
+          max_size = 32;
+        else if (ty0->isDoubleTy())
+          max_size = 64;
+        else if (ty0->isX86_FP80Ty())
+          max_size = 80;
+        else if (ty0->isFP128Ty() || ty0->isPPC_FP128Ty())
+          max_size = 128;
 
-      if (max_size) {
+        attr += 8;
+        do_cast = 1;
 
-        Value *V0 = IRB.CreateBitCast(op0, IntegerType::get(C, max_size));
-        intTyOp0 = dyn_cast<IntegerType>(V0->getType());
-        Value *V1 = IRB.CreateBitCast(op1, IntegerType::get(C, max_size));
-        intTyOp1 = dyn_cast<IntegerType>(V1->getType());
+      } else {
+
+        intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+        intTyOp1 = dyn_cast<IntegerType>(op1->getType());
 
         if (intTyOp0 && intTyOp1) {
 
           max_size = intTyOp0->getBitWidth() > intTyOp1->getBitWidth()
                          ? intTyOp0->getBitWidth()
                          : intTyOp1->getBitWidth();
-          args.push_back(V0);
-          args.push_back(V1);
+
+        }
+
+      }
+
+      if (!max_size || max_size < 16) { continue; }
+
+      if (max_size % 8) {
+
+        if (is_lto) {
+
+          continue;  // LTO cannot bitcast from _ExtInt() :(
 
         } else {
 
-          max_size = 0;
+          max_size = (((max_size / 8) + 1) * 8);
+          do_cast = 1;
 
         }
 
       }
 
-    } else {
+      if (max_size > 128) {
+
+        if (!be_quiet) {
 
-      intTyOp0 = dyn_cast<IntegerType>(op0->getType());
-      intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+          fprintf(stderr,
+                  "Cannot handle this compare bit size: %u (truncating)\n",
+                  max_size);
 
-      if (intTyOp0 && intTyOp1) {
+        }
+
+        if (is_lto) { continue; }  // LTO cannot bitcast from _ExtInt() :(
+        max_size = 128;
+        do_cast = 1;
+
+      }
+
+      // do we need to cast?
+      switch (max_size) {
+
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+        case 128:
+          cast_size = max_size;
+          break;
+        default:
+          if (is_lto) { continue; }  // LTO cannot bitcast from _ExtInt() :(
+          cast_size = 128;
+          do_cast = 1;
+
+      }
+
+      if (do_cast) {
+
+        // F*cking LLVM optimized out any kind of bitcasts of ConstantInt values
+        // creating illegal calls. WTF. So we have to work around this.
+
+        ConstantInt *cint = dyn_cast<ConstantInt>(op0);
+        if (cint) {
+
+          uint64_t val = cint->getZExtValue();
+          // fprintf(stderr, "ConstantInt: %lu\n", val);
+          ConstantInt *new_param = NULL;
+          switch (cast_size) {
+
+            case 8:
+              new_param = ConstantInt::get(Int8Ty, val);
+              break;
+            case 16:
+              new_param = ConstantInt::get(Int16Ty, val);
+              break;
+            case 32:
+              new_param = ConstantInt::get(Int32Ty, val);
+              break;
+            case 64:
+              new_param = ConstantInt::get(Int64Ty, val);
+              break;
+            case 128:
+              new_param = ConstantInt::get(Int128Ty, val);
+              break;
+
+          }
+
+          if (!new_param) { continue; }
+          args.push_back(new_param);
+
+        } else {
+
+          Value *V0 = IRB.CreateBitCast(op0, IntegerType::get(C, cast_size));
+          args.push_back(V0);
+
+        }
+
+        cint = dyn_cast<ConstantInt>(op1);
+        if (cint) {
+
+          uint64_t     val = cint->getZExtValue();
+          ConstantInt *new_param = NULL;
+          switch (cast_size) {
+
+            case 8:
+              new_param = ConstantInt::get(Int8Ty, val);
+              break;
+            case 16:
+              new_param = ConstantInt::get(Int16Ty, val);
+              break;
+            case 32:
+              new_param = ConstantInt::get(Int32Ty, val);
+              break;
+            case 64:
+              new_param = ConstantInt::get(Int64Ty, val);
+              break;
+            case 128:
+              new_param = ConstantInt::get(Int128Ty, val);
+              break;
+
+          }
+
+          if (!new_param) { continue; }
+          args.push_back(new_param);
+
+        } else {
+
+          Value *V1 = IRB.CreateBitCast(op1, IntegerType::get(C, cast_size));
+          args.push_back(V1);
+
+        }
+
+      } else {
 
-        max_size = intTyOp0->getBitWidth() > intTyOp1->getBitWidth()
-                       ? intTyOp0->getBitWidth()
-                       : intTyOp1->getBitWidth();
         args.push_back(op0);
         args.push_back(op1);
 
       }
 
-    }
+      ConstantInt *attribute = ConstantInt::get(Int8Ty, attr);
+      args.push_back(attribute);
+
+      if (cast_size != max_size) {
+
+        ConstantInt *bitsize = ConstantInt::get(Int8Ty, (max_size / 8) - 1);
+        args.push_back(bitsize);
+
+      }
+
+      // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n",
+      //         max_size, cast_size, attr, do_cast);
+
+      switch (cast_size) {
 
-    if (max_size < 8 || max_size > 64 || !intTyOp0 || !intTyOp1) continue;
-
-    switch (max_size) {
-
-      case 8:
-        IRB.CreateCall(cmplogHookIns1, args);
-        break;
-      case 16:
-        IRB.CreateCall(cmplogHookIns2, args);
-        break;
-      case 32:
-        IRB.CreateCall(cmplogHookIns4, args);
-        break;
-      case 64:
-        IRB.CreateCall(cmplogHookIns8, args);
-        break;
-      default:
-        break;
+        case 8:
+          IRB.CreateCall(cmplogHookIns1, args);
+          break;
+        case 16:
+          IRB.CreateCall(cmplogHookIns2, args);
+          break;
+        case 32:
+          IRB.CreateCall(cmplogHookIns4, args);
+          break;
+        case 64:
+          IRB.CreateCall(cmplogHookIns8, args);
+          break;
+        case 128:
+          if (max_size == 128) {
+
+            IRB.CreateCall(cmplogHookIns16, args);
+
+          } else {
+
+            IRB.CreateCall(cmplogHookInsN, args);
+
+          }
+
+          break;
+
+      }
 
     }
 
   }
 
-  return true;
+  if (switches.size() || icomps.size())
+    return true;
+  else
+    return false;
 
 }
 
diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc
index e92883ae..a5992c9a 100644
--- a/instrumentation/cmplog-routines-pass.cc
+++ b/instrumentation/cmplog-routines-pass.cc
@@ -87,7 +87,7 @@ char CmpLogRoutines::ID = 0;
 
 bool CmpLogRoutines::hookRtns(Module &M) {
 
-  std::vector<CallInst *> calls;
+  std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC;
   LLVMContext &           C = M.getContext();
 
   Type *VoidTy = Type::getVoidTy(C);
@@ -112,6 +112,78 @@ bool CmpLogRoutines::hookRtns(Module &M) {
   FunctionCallee cmplogHookFn = c;
 #endif
 
+#if LLVM_VERSION_MAJOR < 9
+  Constant *
+#else
+  FunctionCallee
+#endif
+      c1 = M.getOrInsertFunction("__cmplog_rtn_llvm_stdstring_stdstring",
+                                 VoidTy, i8PtrTy, i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR < 9
+  Function *cmplogLlvmStdStd = cast<Function>(c1);
+#else
+  FunctionCallee cmplogLlvmStdStd = c1;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+  Constant *
+#else
+  FunctionCallee
+#endif
+      c2 = M.getOrInsertFunction("__cmplog_rtn_llvm_stdstring_cstring", VoidTy,
+                                 i8PtrTy, i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR < 9
+  Function *cmplogLlvmStdC = cast<Function>(c2);
+#else
+  FunctionCallee cmplogLlvmStdC = c2;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+  Constant *
+#else
+  FunctionCallee
+#endif
+      c3 = M.getOrInsertFunction("__cmplog_rtn_gcc_stdstring_stdstring", VoidTy,
+                                 i8PtrTy, i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR < 9
+  Function *cmplogGccStdStd = cast<Function>(c3);
+#else
+  FunctionCallee cmplogGccStdStd = c3;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+  Constant *
+#else
+  FunctionCallee
+#endif
+      c4 = M.getOrInsertFunction("__cmplog_rtn_gcc_stdstring_cstring", VoidTy,
+                                 i8PtrTy, i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR < 9
+  Function *cmplogGccStdC = cast<Function>(c4);
+#else
+  FunctionCallee cmplogGccStdC = c4;
+#endif
+
   /* iterate over all functions, bbs and instruction and add suitable calls */
   for (auto &F : M) {
 
@@ -136,9 +208,64 @@ bool CmpLogRoutines::hookRtns(Module &M) {
                           FT->getParamType(0) == FT->getParamType(1) &&
                           FT->getParamType(0)->isPointerTy();
 
-          if (!isPtrRtn) continue;
-
-          calls.push_back(callInst);
+          bool isGccStdStringStdString =
+              Callee->getName().find("__is_charIT_EE7__value") !=
+                  std::string::npos &&
+              Callee->getName().find(
+                  "St7__cxx1112basic_stringIS2_St11char_traits") !=
+                  std::string::npos &&
+              FT->getNumParams() >= 2 &&
+              FT->getParamType(0) == FT->getParamType(1) &&
+              FT->getParamType(0)->isPointerTy();
+
+          bool isGccStdStringCString =
+              Callee->getName().find(
+                  "St7__cxx1112basic_stringIcSt11char_"
+                  "traitsIcESaIcEE7compareEPK") != std::string::npos &&
+              FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() &&
+              FT->getParamType(1)->isPointerTy();
+
+          bool isLlvmStdStringStdString =
+              Callee->getName().find("_ZNSt3__1eqI") != std::string::npos &&
+              Callee->getName().find("_12basic_stringI") != std::string::npos &&
+              Callee->getName().find("_11char_traits") != std::string::npos &&
+              FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() &&
+              FT->getParamType(1)->isPointerTy();
+
+          bool isLlvmStdStringCString =
+              Callee->getName().find("_ZNSt3__1eqI") != std::string::npos &&
+              Callee->getName().find("_12basic_stringI") != std::string::npos &&
+              FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() &&
+              FT->getParamType(1)->isPointerTy();
+
+          /*
+                    {
+
+                       fprintf(stderr, "F:%s C:%s argc:%u\n",
+                       F.getName().str().c_str(),
+             Callee->getName().str().c_str(), FT->getNumParams());
+                       fprintf(stderr, "ptr0:%u ptr1:%u ptr2:%u\n",
+                              FT->getParamType(0)->isPointerTy(),
+                              FT->getParamType(1)->isPointerTy(),
+                              FT->getNumParams() > 2 ?
+             FT->getParamType(2)->isPointerTy() : 22 );
+
+                    }
+
+          */
+
+          if (isGccStdStringCString || isGccStdStringStdString ||
+              isLlvmStdStringStdString || isLlvmStdStringCString) {
+
+            isPtrRtn = false;
+
+          }
+
+          if (isPtrRtn) { calls.push_back(callInst); }
+          if (isGccStdStringStdString) { gccStdStd.push_back(callInst); }
+          if (isGccStdStringCString) { gccStdC.push_back(callInst); }
+          if (isLlvmStdStringStdString) { llvmStdStd.push_back(callInst); }
+          if (isLlvmStdStringCString) { llvmStdC.push_back(callInst); }
 
         }
 
@@ -148,7 +275,10 @@ bool CmpLogRoutines::hookRtns(Module &M) {
 
   }
 
-  if (!calls.size()) return false;
+  if (!calls.size() && !gccStdStd.size() && !gccStdC.size() &&
+      !llvmStdStd.size() && !llvmStdC.size())
+    return false;
+
   /*
     if (!be_quiet)
       errs() << "Hooking " << calls.size()
@@ -174,6 +304,82 @@ bool CmpLogRoutines::hookRtns(Module &M) {
 
   }
 
+  for (auto &callInst : gccStdStd) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+    IRBuilder<> IRB(callInst->getParent());
+    IRB.SetInsertPoint(callInst);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+
+    IRB.CreateCall(cmplogGccStdStd, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
+  for (auto &callInst : gccStdC) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+    IRBuilder<> IRB(callInst->getParent());
+    IRB.SetInsertPoint(callInst);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+
+    IRB.CreateCall(cmplogGccStdC, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
+  for (auto &callInst : llvmStdStd) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+    IRBuilder<> IRB(callInst->getParent());
+    IRB.SetInsertPoint(callInst);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+
+    IRB.CreateCall(cmplogLlvmStdStd, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
+  for (auto &callInst : llvmStdC) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+    IRBuilder<> IRB(callInst->getParent());
+    IRB.SetInsertPoint(callInst);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+
+    IRB.CreateCall(cmplogLlvmStdC, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
   return true;
 
 }
diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc
index da5cf7e9..bd524a69 100644
--- a/instrumentation/compare-transform-pass.so.cc
+++ b/instrumentation/compare-transform-pass.so.cc
@@ -362,19 +362,22 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     bool        HasStr1 = getConstantStringInfo(Str1P, Str1);
     bool        HasStr2 = getConstantStringInfo(Str2P, Str2);
     uint64_t    constStrLen, unrollLen, constSizedLen = 0;
-    bool        isMemcmp =
-        !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
-    bool isSizedcmp = isMemcmp ||
-                      !callInst->getCalledFunction()->getName().compare(
-                          StringRef("strncmp")) ||
-                      !callInst->getCalledFunction()->getName().compare(
-                          StringRef("strncasecmp"));
+    bool        isMemcmp = false;
+    bool        isSizedcmp = false;
+    bool        isCaseInsensitive = false;
+    Function *  Callee = callInst->getCalledFunction();
+    if (Callee) {
+
+      isMemcmp = Callee->getName().compare("memcmp") == 0;
+      isSizedcmp = isMemcmp || Callee->getName().compare("strncmp") == 0 ||
+                   Callee->getName().compare("strncasecmp") == 0;
+      isCaseInsensitive = Callee->getName().compare("strcasecmp") == 0 ||
+                          Callee->getName().compare("strncasecmp") == 0;
+
+    }
+
     Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL;
     bool   isConstSized = sizedValue && isa<ConstantInt>(sizedValue);
-    bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare(
-                                 StringRef("strcasecmp")) ||
-                             !callInst->getCalledFunction()->getName().compare(
-                                 StringRef("strncasecmp"));
 
     if (!(HasStr1 || HasStr2)) {
 
@@ -391,7 +394,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
         if (val && !val->empty()) {
 
           Str2 = StringRef(*val);
-          HasStr2 = true;
+          // HasStr2 = true;
 
         }
 
@@ -436,15 +439,6 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     else
       unrollLen = constStrLen;
 
-    /*
-        if (!be_quiet)
-          errs() << callInst->getCalledFunction()->getName() << ": unroll len "
-                 << unrollLen
-                 << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ":
-       "
-                 << ConstStr << "\n";
-    */
-
     /* split before the call instruction */
     BasicBlock *bb = callInst->getParent();
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst));
diff --git a/instrumentation/llvm-ngram-coverage.h b/instrumentation/llvm-ngram-coverage.h
index 12b666e9..666839c8 100644
--- a/instrumentation/llvm-ngram-coverage.h
+++ b/instrumentation/llvm-ngram-coverage.h
@@ -1,7 +1,7 @@
 #ifndef AFL_NGRAM_CONFIG_H
 #define AFL_NGRAM_CONFIG_H
 
-#include "../config.h"
+#include "types.h"
 
 #if (MAP_SIZE_POW2 <= 16)
 typedef u16 PREV_LOC_T;
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index b6d8c466..80cd90ba 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -407,6 +407,7 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
     auto op1 = IcmpInst->getOperand(1);
 
     IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+    if (!intTyOp0) { continue; }
     unsigned     bitw = intTyOp0->getBitWidth();
     IntegerType *IntType = IntegerType::get(C, bitw);
 
@@ -606,10 +607,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
                                    : sizeInBits == 64  ? 53
                                    : sizeInBits == 128 ? 113
                                    : sizeInBits == 16  ? 11
-                                                      /* sizeInBits == 80 */
-                                                      : 65;
+                                   : sizeInBits == 80  ? 65
+                                                       : sizeInBits - 8;
 
-    const unsigned           shiftR_exponent = precision - 1;
+    const unsigned shiftR_exponent = precision - 1;
+    // BUG FIXME TODO: u64 does not work for > 64 bit ... e.g. 80 and 128 bit
     const unsigned long long mask_fraction =
         (1ULL << (shiftR_exponent - 1)) | ((1ULL << (shiftR_exponent - 1)) - 1);
     const unsigned long long mask_exponent =
@@ -1300,12 +1302,9 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 
     case 64:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
       bitw >>= 1;
 #if LLVM_VERSION_MAJOR > 3 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
@@ -1313,12 +1312,9 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 #endif
     case 32:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
       bitw >>= 1;
 #if LLVM_VERSION_MAJOR > 3 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
@@ -1326,13 +1322,10 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 #endif
     case 16:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
-      bitw >>= 1;
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
+      // bitw >>= 1;
       break;
 
     default:
diff --git a/qemu_mode/QEMUAFL_VERSION b/qemu_mode/QEMUAFL_VERSION
index 9d6d7dba..b0d4fd45 100644
--- a/qemu_mode/QEMUAFL_VERSION
+++ b/qemu_mode/QEMUAFL_VERSION
@@ -1 +1 @@
-6ea7398ee3
+213f3b27dd
diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh
index e14d238a..50e5d4e8 100755
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@@ -240,7 +240,7 @@ QEMU_CONF_FLAGS=" \
 
 if [ -n "${CROSS_PREFIX}" ]; then
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} --cross-prefix=${CROSS_PREFIX}"
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS --cross-prefix=$CROSS_PREFIX"
 
 fi
 
@@ -248,14 +248,14 @@ if [ "$STATIC" = "1" ]; then
 
   echo Building STATIC binary
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --static \
     --extra-cflags=-DAFL_QEMU_STATIC_BUILD=1 \
     "
 
 else
 
-  QEMU_CONF_FLAGS="{$QEMU_CONF_FLAGS} --enable-pie "
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} --enable-pie "
 
 fi
 
@@ -266,7 +266,7 @@ if [ "$DEBUG" = "1" ]; then
   # --enable-gcov might go here but incurs a mesonbuild error on meson
   # versions prior to 0.56:
   # https://github.com/qemu/meson/commit/903d5dd8a7dc1d6f8bef79e66d6ebc07c
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --disable-strip \
     --enable-debug \
     --enable-debug-info \
@@ -279,7 +279,7 @@ if [ "$DEBUG" = "1" ]; then
 
 else
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --disable-debug-info \
     --disable-debug-mutex \
     --disable-debug-tcg \
@@ -294,7 +294,7 @@ if [ "$PROFILING" = "1" ]; then
 
   echo Building PROFILED binary
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --enable-gprof \
     --enable-profiler \
     "
@@ -302,7 +302,7 @@ if [ "$PROFILING" = "1" ]; then
 fi
 
 # shellcheck disable=SC2086
-./configure ${QEMU_CONF_FLAGS} || exit 1
+./configure $QEMU_CONF_FLAGS || exit 1
 
 echo "[+] Configuration complete."
 
@@ -364,10 +364,27 @@ else
 
 fi
 
-echo "[+] Building libcompcov ..."
-make -C libcompcov && echo "[+] libcompcov ready"
-echo "[+] Building unsigaction ..."
-make -C unsigaction && echo "[+] unsigaction ready"
+ORIG_CROSS="$CROSS"
+
+if [ "$ORIG_CROSS" = "" ]; then
+  CROSS=$CPU_TARGET-linux-gnu-gcc
+  if ! command -v "$CROSS" > /dev/null
+  then # works on Arch Linux
+    CROSS=$CPU_TARGET-pc-linux-gnu-gcc
+  fi
+fi
+
+if ! command -v "$CROSS" > /dev/null
+then
+    echo "[!] Cross compiler $CROSS could not be found, cannot compile libcompcov libqasan and unsigaction"
+else
+  echo "[+] Building libcompcov ..."
+  make -C libcompcov CC=$CROSS && echo "[+] libcompcov ready"
+  echo "[+] Building unsigaction ..."
+  make -C unsigaction CC=$CROSS && echo "[+] unsigaction ready"
+  echo "[+] Building libqasan ..."
+  make -C libqasan CC=$CROSS && echo "[+] unsigaction ready"
+fi
 
 echo "[+] All done for qemu_mode, enjoy!"
 
diff --git a/qemu_mode/libcompcov/libcompcov.so.c b/qemu_mode/libcompcov/libcompcov.so.c
index 23f465a4..4fc84e62 100644
--- a/qemu_mode/libcompcov/libcompcov.so.c
+++ b/qemu_mode/libcompcov/libcompcov.so.c
@@ -29,6 +29,8 @@
 #include <sys/types.h>
 #include <sys/shm.h>
 #include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
 
 #include "types.h"
 #include "config.h"
@@ -159,14 +161,15 @@ static void __compcov_load(void) {
 
 }
 
-static void __compcov_trace(u64 cur_loc, const u8 *v0, const u8 *v1, size_t n) {
+static void __compcov_trace(uintptr_t cur_loc, const u8 *v0, const u8 *v1,
+                            size_t n) {
 
   size_t i;
 
   if (debug_fd != 1) {
 
     char debugbuf[4096];
-    snprintf(debugbuf, sizeof(debugbuf), "0x%llx %s %s %zu\n", cur_loc,
+    snprintf(debugbuf, sizeof(debugbuf), "0x%" PRIxPTR " %s %s %zu\n", cur_loc,
              v0 == NULL ? "(null)" : (char *)v0,
              v1 == NULL ? "(null)" : (char *)v1, n);
     write(debug_fd, debugbuf, strlen(debugbuf));
@@ -206,7 +209,7 @@ int strcmp(const char *str1, const char *str2) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -235,7 +238,7 @@ int strncmp(const char *str1, const char *str2, size_t len) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -265,7 +268,7 @@ int strcasecmp(const char *str1, const char *str2) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -296,7 +299,7 @@ int strncasecmp(const char *str1, const char *str2, size_t len) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -324,7 +327,7 @@ int memcmp(const void *mem1, const void *mem2, size_t len) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
diff --git a/qemu_mode/libqasan/Makefile b/qemu_mode/libqasan/Makefile
new file mode 100644
index 00000000..f91debb6
--- /dev/null
+++ b/qemu_mode/libqasan/Makefile
@@ -0,0 +1,44 @@
+#
+# american fuzzy lop++ - libqasan
+# -------------------------------
+#
+# Written by Andrea Fioraldi <andreafioraldi@gmail.com>
+#
+# Copyright 2019-2020 Andrea Fioraldi. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+
+PREFIX      ?= /usr/local
+HELPER_PATH  = $(PREFIX)/lib/afl
+DOC_PATH    ?= $(PREFIX)/share/doc/afl
+MAN_PATH    ?= $(PREFIX)/share/man/man8
+
+VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
+
+CFLAGS      += -I ../qemuafl/qemuafl/
+CFLAGS      += -Wno-int-to-void-pointer-cast -ggdb
+LDFLAGS     += -ldl -pthread
+
+SRC := libqasan.c hooks.c malloc.c string.c uninstrument.c patch.c dlmalloc.c
+HDR := libqasan.h
+
+all: libqasan.so
+
+libqasan.so: $(HDR) $(SRC)
+	$(CC) $(CFLAGS) -fPIC -shared $(SRC) -o ../../$@ $(LDFLAGS)
+
+.NOTPARALLEL: clean
+
+clean:
+	rm -f *.o *.so *~ a.out core core.[1-9][0-9]*
+	rm -f ../../libqasan.so
+
+install: all
+	install -m 755 ../../libqasan.so $${DESTDIR}$(HELPER_PATH)
+	install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.qasan.md
+
diff --git a/qemu_mode/libqasan/README.md b/qemu_mode/libqasan/README.md
new file mode 100644
index 00000000..4a241233
--- /dev/null
+++ b/qemu_mode/libqasan/README.md
@@ -0,0 +1,28 @@
+# QEMU AddressSanitizer Runtime
+
+This library is the injected runtime used by QEMU AddressSanitizer (QASan).
+
+The original repository is [here](https://github.com/andreafioraldi/qasan).
+
+The version embedded in qemuafl is an updated version of just the usermode part
+and this runtime is injected via LD_PRELOAD (so works just for dynamically
+linked binaries).
+
+The usage is super simple, just set the env var `AFL_USE_QASAN=1` when fuzzing
+in qemu mode (-Q). afl-fuzz will automatically set AFL_PRELOAD to load this
+library and enable the QASan instrumentation in afl-qemu-trace.
+
+For debugging purposes, we still suggest to run the original QASan as the
+stacktrace support for ARM (just a debug feature, it does not affect the bug
+finding capabilities during fuzzing) is WIP.
+
+### When should I use QASan?
+
+If your target binary is PIC x86_64, you should also give a try to
+[retrowrite](https://github.com/HexHive/retrowrite) for static rewriting.
+
+If it fails, or if your binary is for another architecture, or you want to use
+persistent and snapshot mode, AFL++ QASan mode is what you want/have to use.
+
+Note that the overhead of libdislocator when combined with QEMU mode is much
+lower but it can catch less bugs. This is a short blanket, take your choice.
diff --git a/qemu_mode/libqasan/dlmalloc.c b/qemu_mode/libqasan/dlmalloc.c
new file mode 100644
index 00000000..aff58ad5
--- /dev/null
+++ b/qemu_mode/libqasan/dlmalloc.c
@@ -0,0 +1,7328 @@
+#include <features.h>
+
+#ifndef __GLIBC__
+
+/*
+  This is a version (aka dlmalloc) of malloc/free/realloc written by
+  Doug Lea and released to the public domain, as explained at
+  http://creativecommons.org/publicdomain/zero/1.0/ Send questions,
+  comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version 2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
+   Note: There may be an updated version of this malloc obtainable at
+           ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+         Check before installing!
+
+* Quickstart
+
+  This library is all in one file to simplify the most common usage:
+  ftp it, compile it (-O3), and link it into another program. All of
+  the compile-time options default to reasonable values for use on
+  most platforms.  You might later want to step through various
+  compile-time and dynamic tuning options.
+
+  For convenience, an include file for code using this malloc is at:
+     ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h
+  You don't really need this .h file unless you call functions not
+  defined in your system include files.  The .h file contains only the
+  excerpts from this file needed for using this malloc on ANSI C/C++
+  systems, so long as you haven't changed compile-time options about
+  naming and tuning parameters.  If you do, then you can create your
+  own malloc.h that does include all settings by cutting at the point
+  indicated below. Note that you may already by default be using a C
+  library containing a malloc that is based on some version of this
+  malloc (for example in linux). You might still want to use the one
+  in this file to customize settings or to avoid overheads associated
+  with library versions.
+
+* Vital statistics:
+
+  Supported pointer/size_t representation:       4 or 8 bytes
+       size_t MUST be an unsigned type of the same width as
+       pointers. (If you are using an ancient system that declares
+       size_t as a signed type, or need it to be a different width
+       than pointers, you can use a previous release of this malloc
+       (e.g. 2.7.2) supporting these.)
+
+  Alignment:                                     8 bytes (minimum)
+       This suffices for nearly all current machines and C compilers.
+       However, you can define MALLOC_ALIGNMENT to be wider than this
+       if necessary (up to 128bytes), at the expense of using more space.
+
+  Minimum overhead per allocated chunk:   4 or  8 bytes (if 4byte sizes)
+                                          8 or 16 bytes (if 8byte sizes)
+       Each malloced chunk has a hidden word of overhead holding size
+       and status information, and additional cross-check word
+       if FOOTERS is defined.
+
+  Minimum allocated size: 4-byte ptrs:  16 bytes    (including overhead)
+                          8-byte ptrs:  32 bytes    (including overhead)
+
+       Even a request for zero bytes (i.e., malloc(0)) returns a
+       pointer to something of the minimum allocatable size.
+       The maximum overhead wastage (i.e., number of extra bytes
+       allocated than were requested in malloc) is less than or equal
+       to the minimum size, except for requests >= mmap_threshold that
+       are serviced via mmap(), where the worst case wastage is about
+       32 bytes plus the remainder from a system page (the minimal
+       mmap unit); typically 4096 or 8192 bytes.
+
+  Security: static-safe; optionally more or less
+       The "security" of malloc refers to the ability of malicious
+       code to accentuate the effects of errors (for example, freeing
+       space that is not currently malloc'ed or overwriting past the
+       ends of chunks) in code that calls malloc.  This malloc
+       guarantees not to modify any memory locations below the base of
+       heap, i.e., static variables, even in the presence of usage
+       errors.  The routines additionally detect most improper frees
+       and reallocs.  All this holds as long as the static bookkeeping
+       for malloc itself is not corrupted by some other means.  This
+       is only one aspect of security -- these checks do not, and
+       cannot, detect all possible programming errors.
+
+       If FOOTERS is defined nonzero, then each allocated chunk
+       carries an additional check word to verify that it was malloced
+       from its space.  These check words are the same within each
+       execution of a program using malloc, but differ across
+       executions, so externally crafted fake chunks cannot be
+       freed. This improves security by rejecting frees/reallocs that
+       could corrupt heap memory, in addition to the checks preventing
+       writes to statics that are always on.  This may further improve
+       security at the expense of time and space overhead.  (Note that
+       FOOTERS may also be worth using with MSPACES.)
+
+       By default detected errors cause the program to abort (calling
+       "abort()"). You can override this to instead proceed past
+       errors by defining PROCEED_ON_ERROR.  In this case, a bad free
+       has no effect, and a malloc that encounters a bad address
+       caused by user overwrites will ignore the bad address by
+       dropping pointers and indices to all known memory. This may
+       be appropriate for programs that should continue if at all
+       possible in the face of programming errors, although they may
+       run out of memory because dropped memory is never reclaimed.
+
+       If you don't like either of these options, you can define
+       CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+       else. And if if you are sure that your program using malloc has
+       no errors or vulnerabilities, you can define INSECURE to 1,
+       which might (or might not) provide a small performance improvement.
+
+       It is also possible to limit the maximum total allocatable
+       space, using malloc_set_footprint_limit. This is not
+       designed as a security feature in itself (calls to set limits
+       are not screened or privileged), but may be useful as one
+       aspect of a secure implementation.
+
+  Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero
+       When USE_LOCKS is defined, each public call to malloc, free,
+       etc is surrounded with a lock. By default, this uses a plain
+       pthread mutex, win32 critical section, or a spin-lock if if
+       available for the platform and not disabled by setting
+       USE_SPIN_LOCKS=0.  However, if USE_RECURSIVE_LOCKS is defined,
+       recursive versions are used instead (which are not required for
+       base functionality but may be needed in layered extensions).
+       Using a global lock is not especially fast, and can be a major
+       bottleneck.  It is designed only to provide minimal protection
+       in concurrent environments, and to provide a basis for
+       extensions.  If you are using malloc in a concurrent program,
+       consider instead using nedmalloc
+       (http://www.nedprod.com/programs/portable/nedmalloc/) or
+       ptmalloc (See http://www.malloc.de), which are derived from
+       versions of this malloc.
+
+  System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+       This malloc can use unix sbrk or any emulation (invoked using
+       the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+       (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+       memory.  On most unix systems, it tends to work best if both
+       MORECORE and MMAP are enabled.  On Win32, it uses emulations
+       based on VirtualAlloc. It also uses common C library functions
+       like memset.
+
+  Compliance: I believe it is compliant with the Single Unix Specification
+       (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+       others as well.
+
+* Overview of algorithms
+
+  This is not the fastest, most space-conserving, most portable, or
+  most tunable malloc ever written. However it is among the fastest
+  while also being among the most space-conserving, portable and
+  tunable.  Consistent balance across these factors results in a good
+  general-purpose allocator for malloc-intensive programs.
+
+  In most ways, this malloc is a best-fit allocator. Generally, it
+  chooses the best-fitting existing chunk for a request, with ties
+  broken in approximately least-recently-used order. (This strategy
+  normally maintains low fragmentation.) However, for requests less
+  than 256bytes, it deviates from best-fit when there is not an
+  exactly fitting available chunk by preferring to use space adjacent
+  to that used for the previous small request, as well as by breaking
+  ties in approximately most-recently-used order. (These enhance
+  locality of series of small allocations.)  And for very large requests
+  (>= 256Kb by default), it relies on system memory mapping
+  facilities, if supported.  (This helps avoid carrying around and
+  possibly fragmenting memory used only for large chunks.)
+
+  All operations (except malloc_stats and mallinfo) have execution
+  times that are bounded by a constant factor of the number of bits in
+  a size_t, not counting any clearing in calloc or copying in realloc,
+  or actions surrounding MORECORE and MMAP that have times
+  proportional to the number of non-contiguous regions returned by
+  system allocation routines, which is often just 1. In real-time
+  applications, you can optionally suppress segment traversals using
+  NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+  system allocators return non-contiguous spaces, at the typical
+  expense of carrying around more memory and increased fragmentation.
+
+  The implementation is not very modular and seriously overuses
+  macros. Perhaps someday all C compilers will do as good a job
+  inlining modular code as can now be done by brute-force expansion,
+  but now, enough of them seem not to.
+
+  Some compilers issue a lot of warnings about code that is
+  dead/unreachable only on some platforms, and also about intentional
+  uses of negation on unsigned types. All known cases of each can be
+  ignored.
+
+  For a longer but out of date high-level description, see
+     http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+  If MSPACES is defined, then in addition to malloc, free, etc.,
+  this file also defines mspace_malloc, mspace_free, etc. These
+  are versions of malloc routines that take an "mspace" argument
+  obtained using create_mspace, to control all internal bookkeeping.
+  If ONLY_MSPACES is defined, only these versions are compiled.
+  So if you would like to use this allocator for only some allocations,
+  and your system malloc for others, you can compile with
+  ONLY_MSPACES and then do something like...
+    static mspace mymspace = create_mspace(0,0); // for example
+    #define mymalloc(bytes)  mspace_malloc(mymspace, bytes)
+
+  (Note: If you only need one instance of an mspace, you can instead
+  use "USE_DL_PREFIX" to relabel the global malloc.)
+
+  You can similarly create thread-local allocators by storing
+  mspaces as thread-locals. For example:
+    static __thread mspace tlms = 0;
+    void*  tlmalloc(size_t bytes) {
+
+      if (tlms == 0) tlms = create_mspace(0, 0);
+      return mspace_malloc(tlms, bytes);
+
+    }
+
+    void  tlfree(void* mem) { mspace_free(tlms, mem); }
+
+  Unless FOOTERS is defined, each mspace is completely independent.
+  You cannot allocate from one and free to another (although
+  conformance is only weakly checked, so usage errors are not always
+  caught). If FOOTERS is defined, then each chunk carries around a tag
+  indicating its originating mspace, and frees are directed to their
+  originating spaces. Normally, this requires use of locks.
+
+ -------------------------  Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32                    default: defined if _WIN32 defined
+  Defining WIN32 sets up defaults for MS environment and compilers.
+  Otherwise defaults are for unix. Beware that there seem to be some
+  cases where this malloc might not be a pure drop-in replacement for
+  Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+  SetDIBits()) may be due to bugs in some video driver implementations
+  when pixel buffers are malloc()ed, and the region spans more than
+  one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+  default granularity, pixel buffers may straddle virtual allocation
+  regions more often than when using the Microsoft allocator.  You can
+  avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+  buffers rather than using malloc().  If this is not possible,
+  recompile this malloc with a larger DEFAULT_GRANULARITY. Note:
+  in cases where MSC and gcc (cygwin) are known to differ on WIN32,
+  conditions use _MSC_VER to distinguish them.
+
+DLMALLOC_EXPORT       default: extern
+  Defines how public APIs are declared. If you want to export via a
+  Windows DLL, you might define this as
+    #define DLMALLOC_EXPORT extern  __declspec(dllexport)
+  If you want a POSIX ELF shared object, you might use
+    #define DLMALLOC_EXPORT extern __attribute__((visibility("default")))
+
+MALLOC_ALIGNMENT         default: (size_t)(2 * sizeof(void *))
+  Controls the minimum alignment for malloc'ed chunks.  It must be a
+  power of two and at least 8, even on machines for which smaller
+  alignments would suffice. It may be defined as larger than this
+  though. Note however that code and data structures are optimized for
+  the case of 8-byte alignment.
+
+MSPACES                  default: 0 (false)
+  If true, compile in support for independent allocation spaces.
+  This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES             default: 0 (false)
+  If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS                default: 0 (false)
+  Causes each call to each public routine to be surrounded with
+  pthread or WIN32 mutex lock/unlock. (If set true, this can be
+  overridden on a per-mspace basis for mspace versions.) If set to a
+  non-zero value other than 1, locks are used, but their
+  implementation is left out, so lock functions must be supplied manually,
+  as described below.
+
+USE_SPIN_LOCKS           default: 1 iff USE_LOCKS and spin locks available
+  If true, uses custom spin locks for locking. This is currently
+  supported only gcc >= 4.1, older gccs on x86 platforms, and recent
+  MS compilers.  Otherwise, posix locks or win32 critical sections are
+  used.
+
+USE_RECURSIVE_LOCKS      default: not defined
+  If defined nonzero, uses recursive (aka reentrant) locks, otherwise
+  uses plain mutexes. This is not required for malloc proper, but may
+  be needed for layered allocators such as nedmalloc.
+
+LOCK_AT_FORK            default: not defined
+  If defined nonzero, performs pthread_atfork upon initialization
+  to initialize child lock while holding parent lock. The implementation
+  assumes that pthread locks (not custom locks) are being used. In other
+  cases, you may need to customize the implementation.
+
+FOOTERS                  default: 0
+  If true, provide extra checking and dispatching by placing
+  information in the footers of allocated chunks. This adds
+  space and time overhead.
+
+INSECURE                 default: 0
+  If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX            default: NOT defined
+  Causes compiler to prefix all public routines with the string 'dl'.
+  This can be useful when you only want to use this malloc in one part
+  of a program, using your regular system malloc elsewhere.
+
+MALLOC_INSPECT_ALL       default: NOT defined
+  If defined, compiles malloc_inspect_all and mspace_inspect_all, that
+  perform traversal of all heap space.  Unless access to these
+  functions is otherwise restricted, you probably do not want to
+  include them in secure implementations.
+
+ABORT                    default: defined as abort()
+  Defines how to abort on failed checks.  On most systems, a failed
+  check cannot die with an "assert" or even print an informative
+  message, because the underlying print routines in turn call malloc,
+  which will fail again.  Generally, the best policy is to simply call
+  abort(). It's not very useful to do more than this because many
+  errors due to overwriting will show up as address faults (null, odd
+  addresses etc) rather than malloc-triggered checks, so will also
+  abort.  Also, most compilers know that abort() does not return, so
+  can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR           default: defined as 0 (false)
+  Controls whether detected bad addresses cause them to bypassed
+  rather than aborting. If set, detected bad arguments to free and
+  realloc are ignored. And all bookkeeping information is zeroed out
+  upon a detected overwrite of freed heap space, thus losing the
+  ability to ever return it from malloc again, but enabling the
+  application to proceed. If PROCEED_ON_ERROR is defined, the
+  static variable malloc_corruption_error_count is compiled in
+  and can be examined to see if errors have occurred. This option
+  generates slower code than the default abort policy.
+
+DEBUG                    default: NOT defined
+  The DEBUG setting is mainly intended for people trying to modify
+  this code or diagnose problems when porting to new platforms.
+  However, it may also be able to better isolate user errors than just
+  using runtime checks.  The assertions in the check routines spell
+  out in more detail the assumptions and invariants underlying the
+  algorithms.  The checking is fairly extensive, and will slow down
+  execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+  set will attempt to check every non-mmapped allocated and free chunk
+  in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE   default: defined as 1 (true)
+  Debugging assertion failures can be nearly impossible if your
+  version of the assert macro causes malloc to be called, which will
+  lead to a cascade of further failures, blowing the runtime stack.
+  ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+  which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION     default: sets errno to ENOMEM, or no-op on win32
+  The action to take before "return 0" when malloc fails to be able to
+  return memory because there is none available.
+
+HAVE_MORECORE             default: 1 (true) unless win32 or ONLY_MSPACES
+  True if this system supports sbrk or an emulation of it.
+
+MORECORE                  default: sbrk
+  The name of the sbrk-style system routine to call to obtain more
+  memory.  See below for guidance on writing custom MORECORE
+  functions. The type of the argument to sbrk/MORECORE varies across
+  systems.  It cannot be size_t, because it supports negative
+  arguments, so it is normally the signed type of the same width as
+  size_t (sometimes declared as "intptr_t").  It doesn't much matter
+  though. Internally, we only call it with arguments less than half
+  the max value of a size_t, which should work across all reasonable
+  possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS       default: 1 (true) if HAVE_MORECORE
+  If true, take advantage of fact that consecutive calls to MORECORE
+  with positive arguments always return contiguous increasing
+  addresses.  This is true of unix sbrk. It does not hurt too much to
+  set it true anyway, since malloc copes with non-contiguities.
+  Setting it false when definitely non-contiguous saves time
+  and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM      default: NOT defined
+  True if MORECORE cannot release space back to the system when given
+  negative arguments. This is generally necessary only if you are
+  using a hand-crafted MORECORE function that cannot handle negative
+  arguments.
+
+NO_SEGMENT_TRAVERSAL       default: 0
+  If non-zero, suppresses traversals of memory segments
+  returned by either MORECORE or CALL_MMAP. This disables
+  merging of segments that are contiguous, and selectively
+  releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP                 default: 1 (true)
+  True if this system supports mmap or an emulation of it.  If so, and
+  HAVE_MORECORE is not true, MMAP is used for all system
+  allocation. If set and HAVE_MORECORE is true as well, MMAP is
+  primarily used to directly allocate very large blocks. It is also
+  used as a backup strategy in cases where MORECORE fails to provide
+  space from system. Note: A single call to MUNMAP is assumed to be
+  able to unmap memory that may have be allocated using multiple calls
+  to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP               default: 1 on linux, else 0
+  If true realloc() uses mremap() to re-allocate large blocks and
+  extend or shrink allocation spaces.
+
+MMAP_CLEARS               default: 1 except on WINCE.
+  True if mmap clears memory so calloc doesn't need to. This is true
+  for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS            default: 0 (i.e., not used)
+  Causes malloc to use the builtin ffs() function to compute indices.
+  Some compilers may recognize and intrinsify ffs to be faster than the
+  supplied C version. Also, the case of x86 using gcc is special-cased
+  to an asm instruction, so is already as fast as it can be, and so
+  this setting has no effect. Similarly for Win32 under recent MS compilers.
+  (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize         default: derive from system includes, or 4096.
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  This may be (and
+  usually is) a function rather than a constant. This is ignored
+  if WIN32, where page size is determined using getSystemInfo during
+  initialization.
+
+USE_DEV_RANDOM             default: 0 (i.e., not used)
+  Causes malloc to use /dev/random to initialize secure magic seed for
+  stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO                default: 0
+  If defined, don't compile "mallinfo". This can be a simple way
+  of dealing with mismatches between system declarations and
+  those in this file.
+
+MALLINFO_FIELD_TYPE        default: size_t
+  The type of the fields in the mallinfo struct. This was originally
+  defined as "int" in SVID etc, but is more usefully defined as
+  size_t. The value is used only if  HAVE_USR_INCLUDE_MALLOC_H is not set
+
+NO_MALLOC_STATS            default: 0
+  If defined, don't compile "malloc_stats". This avoids calls to
+  fprintf and bringing in stdio dependencies you might not want.
+
+REALLOC_ZERO_BYTES_FREES    default: not defined
+  This should be set if a call to realloc with zero bytes should
+  be the same as a call to free. Some people think it should. Otherwise,
+  since this malloc returns a unique pointer for malloc(0), so does
+  realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H,  LACKS_ERRNO_H
+LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H  default: NOT defined unless on WIN32
+  Define these if your system does not have these header files.
+  You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY        default: page size if MORECORE_CONTIGUOUS,
+                                system_info.dwAllocationGranularity in WIN32,
+                                otherwise 64K.
+      Also settable using mallopt(M_GRANULARITY, x)
+  The unit for allocating and deallocating memory from the system.  On
+  most systems with contiguous MORECORE, there is no reason to
+  make this more than a page. However, systems with MMAP tend to
+  either require or encourage larger granularities.  You can increase
+  this value to prevent system allocation functions to be called so
+  often, especially if they are slow.  The value must be at least one
+  page and must be a power of two.  Setting to 0 causes initialization
+  to either page size or win32 region size.  (Note: In previous
+  versions of malloc, the equivalent of this option was called
+  "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD    default: 2MB
+      Also settable using mallopt(M_TRIM_THRESHOLD, x)
+  The maximum amount of unused top-most memory to keep before
+  releasing via malloc_trim in free().  Automatic trimming is mainly
+  useful in long-lived programs using contiguous MORECORE.  Because
+  trimming via sbrk can be slow on some systems, and can sometimes be
+  wasteful (in cases where programs immediately afterward allocate
+  more large chunks) the value should be high enough so that your
+  overall system performance would improve by releasing this much
+  memory.  As a rough guide, you might set to a value close to the
+  average size of a process (program) running on your system.
+  Releasing this much memory would allow such a process to run in
+  memory.  Generally, it is worth tuning trim thresholds when a
+  program undergoes phases where several large chunks are allocated
+  and released in ways that can reuse each other's storage, perhaps
+  mixed with phases where there are no such chunks at all. The trim
+  value must be greater than page size to have any useful effect.  To
+  disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+  some people use of mallocing a huge space and then freeing it at
+  program startup, in an attempt to reserve system memory, doesn't
+  have the intended effect under automatic trimming, since that memory
+  will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD       default: 256K
+      Also settable using mallopt(M_MMAP_THRESHOLD, x)
+  The request size threshold for using MMAP to directly service a
+  request. Requests of at least this size that cannot be allocated
+  using already-existing space will be serviced via mmap.  (If enough
+  normal freed space already exists it is used instead.)  Using mmap
+  segregates relatively large chunks of memory so that they can be
+  individually obtained and released from the host system. A request
+  serviced through mmap is never reused by any other request (at least
+  not directly; the system may just so happen to remap successive
+  requests to the same locations).  Segregating space in this way has
+  the benefits that: Mmapped space can always be individually released
+  back to the system, which helps keep the system level memory demands
+  of a long-lived program low.  Also, mapped memory doesn't become
+  `locked' between other chunks, as can happen with normally allocated
+  chunks, which means that even trimming via malloc_trim would not
+  release them.  However, it has the disadvantage that the space
+  cannot be reclaimed, consolidated, and then used to service later
+  requests, as happens with normal chunks.  The advantages of mmap
+  nearly always outweigh disadvantages for "large" chunks, but the
+  value of "large" may vary across systems.  The default is an
+  empirically derived value that works well in most systems. You can
+  disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
+  The number of consolidated frees between checks to release
+  unused segments when freeing. When using non-contiguous segments,
+  especially with multiple mspaces, checking only for topmost space
+  doesn't always suffice to trigger trimming. To compensate for this,
+  free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+  current number of segments, if greater) try to release unused
+  segments to the OS when freeing chunks that result in
+  consolidation. The best value for this parameter is a compromise
+  between slowing down frees with relatively costly checks that
+  rarely trigger versus holding on to unused memory. To effectively
+  disable, set to MAX_SIZE_T. This may lead to a very slight speed
+  improvement at the expense of carrying around more memory.
+*/
+
+  #define USE_DL_PREFIX
+
+  /* Version identifier to allow people to support multiple versions */
+  #ifndef DLMALLOC_VERSION
+    #define DLMALLOC_VERSION 20806
+  #endif                                                /* DLMALLOC_VERSION */
+
+  #ifndef DLMALLOC_EXPORT
+    #define DLMALLOC_EXPORT extern
+  #endif
+
+  #ifndef WIN32
+    #ifdef _WIN32
+      #define WIN32 1
+    #endif                                                        /* _WIN32 */
+    #ifdef _WIN32_WCE
+      #define LACKS_FCNTL_H
+      #define WIN32 1
+    #endif                                                    /* _WIN32_WCE */
+  #endif                                                           /* WIN32 */
+  #ifdef WIN32
+    #define WIN32_LEAN_AND_MEAN
+    #include <windows.h>
+    #include <tchar.h>
+    #define HAVE_MMAP 1
+    #define HAVE_MORECORE 0
+    #define LACKS_UNISTD_H
+    #define LACKS_SYS_PARAM_H
+    #define LACKS_SYS_MMAN_H
+    #define LACKS_STRING_H
+    #define LACKS_STRINGS_H
+    #define LACKS_SYS_TYPES_H
+    #define LACKS_ERRNO_H
+    #define LACKS_SCHED_H
+    #ifndef MALLOC_FAILURE_ACTION
+      #define MALLOC_FAILURE_ACTION
+    #endif                                         /* MALLOC_FAILURE_ACTION */
+    #ifndef MMAP_CLEARS
+      #ifdef _WIN32_WCE                  /* WINCE reportedly does not clear */
+        #define MMAP_CLEARS 0
+      #else
+        #define MMAP_CLEARS 1
+      #endif                                                  /* _WIN32_WCE */
+    #endif                                                    /*MMAP_CLEARS */
+  #endif                                                           /* WIN32 */
+
+  #if defined(DARWIN) || defined(_DARWIN)
+    /* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+    #ifndef HAVE_MORECORE
+      #define HAVE_MORECORE 0
+      #define HAVE_MMAP 1
+      /* OSX allocators provide 16 byte alignment */
+      #ifndef MALLOC_ALIGNMENT
+        #define MALLOC_ALIGNMENT ((size_t)16U)
+      #endif
+    #endif                                                 /* HAVE_MORECORE */
+  #endif                                                          /* DARWIN */
+
+  #ifndef LACKS_SYS_TYPES_H
+    #include <sys/types.h>                                    /* For size_t */
+  #endif                                               /* LACKS_SYS_TYPES_H */
+
+  /* The maximum possible size_t value has all bits set */
+  #define MAX_SIZE_T (~(size_t)0)
+
+  #ifndef USE_LOCKS           /* ensure true if spin or recursive locks set */
+    #define USE_LOCKS                                      \
+      ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
+       (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
+  #endif                                                       /* USE_LOCKS */
+
+  #if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
+    #if ((defined(__GNUC__) &&                                         \
+          ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \
+           defined(__i386__) || defined(__x86_64__))) ||               \
+         (defined(_MSC_VER) && _MSC_VER >= 1310))
+      #ifndef USE_SPIN_LOCKS
+        #define USE_SPIN_LOCKS 1
+      #endif                                              /* USE_SPIN_LOCKS */
+    #elif USE_SPIN_LOCKS
+      #error "USE_SPIN_LOCKS defined without implementation"
+    #endif                                        /* ... locks available... */
+  #elif !defined(USE_SPIN_LOCKS)
+    #define USE_SPIN_LOCKS 0
+  #endif                                                       /* USE_LOCKS */
+
+  #ifndef ONLY_MSPACES
+    #define ONLY_MSPACES 0
+  #endif                                                    /* ONLY_MSPACES */
+  #ifndef MSPACES
+    #if ONLY_MSPACES
+      #define MSPACES 1
+    #else                                                   /* ONLY_MSPACES */
+      #define MSPACES 0
+    #endif                                                  /* ONLY_MSPACES */
+  #endif                                                         /* MSPACES */
+  #ifndef MALLOC_ALIGNMENT
+    #define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+  #endif                                                /* MALLOC_ALIGNMENT */
+  #ifndef FOOTERS
+    #define FOOTERS 0
+  #endif                                                         /* FOOTERS */
+  #ifndef ABORT
+    #define ABORT abort()
+  #endif                                                           /* ABORT */
+  #ifndef ABORT_ON_ASSERT_FAILURE
+    #define ABORT_ON_ASSERT_FAILURE 1
+  #endif                                         /* ABORT_ON_ASSERT_FAILURE */
+  #ifndef PROCEED_ON_ERROR
+    #define PROCEED_ON_ERROR 0
+  #endif                                                /* PROCEED_ON_ERROR */
+
+  #ifndef INSECURE
+    #define INSECURE 0
+  #endif                                                        /* INSECURE */
+  #ifndef MALLOC_INSPECT_ALL
+    #define MALLOC_INSPECT_ALL 0
+  #endif                                              /* MALLOC_INSPECT_ALL */
+  #ifndef HAVE_MMAP
+    #define HAVE_MMAP 1
+  #endif                                                       /* HAVE_MMAP */
+  #ifndef MMAP_CLEARS
+    #define MMAP_CLEARS 1
+  #endif                                                     /* MMAP_CLEARS */
+  #ifndef HAVE_MREMAP
+    #ifdef linux
+      #define HAVE_MREMAP 1
+      #define _GNU_SOURCE                   /* Turns on mremap() definition */
+    #else                                                          /* linux */
+      #define HAVE_MREMAP 0
+    #endif                                                         /* linux */
+  #endif                                                     /* HAVE_MREMAP */
+  #ifndef MALLOC_FAILURE_ACTION
+    #define MALLOC_FAILURE_ACTION errno = ENOMEM;
+  #endif                                           /* MALLOC_FAILURE_ACTION */
+  #ifndef HAVE_MORECORE
+    #if ONLY_MSPACES
+      #define HAVE_MORECORE 0
+    #else                                                   /* ONLY_MSPACES */
+      #define HAVE_MORECORE 1
+    #endif                                                  /* ONLY_MSPACES */
+  #endif                                                   /* HAVE_MORECORE */
+  #if !HAVE_MORECORE
+    #define MORECORE_CONTIGUOUS 0
+  #else                                                   /* !HAVE_MORECORE */
+    #define MORECORE_DEFAULT sbrk
+    #ifndef MORECORE_CONTIGUOUS
+      #define MORECORE_CONTIGUOUS 1
+    #endif                                           /* MORECORE_CONTIGUOUS */
+  #endif                                                   /* HAVE_MORECORE */
+  #ifndef DEFAULT_GRANULARITY
+    #if (MORECORE_CONTIGUOUS || defined(WIN32))
+      #define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
+    #else                                            /* MORECORE_CONTIGUOUS */
+      #define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+    #endif                                           /* MORECORE_CONTIGUOUS */
+  #endif                                             /* DEFAULT_GRANULARITY */
+  #ifndef DEFAULT_TRIM_THRESHOLD
+    #ifndef MORECORE_CANNOT_TRIM
+      #define DEFAULT_TRIM_THRESHOLD \
+        ((size_t)2U * (size_t)1024U * (size_t)1024U)
+    #else                                           /* MORECORE_CANNOT_TRIM */
+      #define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+    #endif                                          /* MORECORE_CANNOT_TRIM */
+  #endif                                          /* DEFAULT_TRIM_THRESHOLD */
+  #ifndef DEFAULT_MMAP_THRESHOLD
+    #if HAVE_MMAP
+      #define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+    #else                                                      /* HAVE_MMAP */
+      #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+    #endif                                                     /* HAVE_MMAP */
+  #endif                                          /* DEFAULT_MMAP_THRESHOLD */
+  #ifndef MAX_RELEASE_CHECK_RATE
+    #if HAVE_MMAP
+      #define MAX_RELEASE_CHECK_RATE 4095
+    #else
+      #define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+    #endif                                                     /* HAVE_MMAP */
+  #endif                                          /* MAX_RELEASE_CHECK_RATE */
+  #ifndef USE_BUILTIN_FFS
+    #define USE_BUILTIN_FFS 0
+  #endif                                                 /* USE_BUILTIN_FFS */
+  #ifndef USE_DEV_RANDOM
+    #define USE_DEV_RANDOM 0
+  #endif                                                  /* USE_DEV_RANDOM */
+  #ifndef NO_MALLINFO
+    #define NO_MALLINFO 0
+  #endif                                                     /* NO_MALLINFO */
+  #ifndef MALLINFO_FIELD_TYPE
+    #define MALLINFO_FIELD_TYPE size_t
+  #endif                                             /* MALLINFO_FIELD_TYPE */
+  #ifndef NO_MALLOC_STATS
+    #define NO_MALLOC_STATS 0
+  #endif                                                 /* NO_MALLOC_STATS */
+  #ifndef NO_SEGMENT_TRAVERSAL
+    #define NO_SEGMENT_TRAVERSAL 0
+  #endif                                            /* NO_SEGMENT_TRAVERSAL */
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+
+  #undef M_TRIM_THRESHOLD
+  #undef M_GRANULARITY
+  #undef M_MMAP_THRESHOLD
+  #define M_TRIM_THRESHOLD (-1)
+  #define M_GRANULARITY (-2)
+  #define M_MMAP_THRESHOLD (-3)
+
+/* ------------------------ Mallinfo declarations ------------------------ */
+
+  #if !NO_MALLINFO
+  /*
+    This version of malloc supports the standard SVID/XPG mallinfo
+    routine that returns a struct containing usage properties and
+    statistics. It should work on any system that has a
+    /usr/include/malloc.h defining struct mallinfo.  The main
+    declaration needed is the mallinfo struct that is returned (by-copy)
+    by mallinfo().  The malloinfo struct contains a bunch of fields that
+    are not even meaningful in this version of malloc.  These fields are
+    are instead filled by mallinfo() with other numbers that might be of
+    interest.
+
+    HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+    /usr/include/malloc.h file that includes a declaration of struct
+    mallinfo.  If so, it is included; else a compliant version is
+    declared below.  These must be precisely the same for mallinfo() to
+    work.  The original SVID version of this struct, defined on most
+    systems with mallinfo, declares all fields as ints. But some others
+    define as unsigned long. If your system defines the fields using a
+    type of different width than listed here, you MUST #include your
+    system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+  */
+
+  /* #define HAVE_USR_INCLUDE_MALLOC_H */
+
+    #ifdef HAVE_USR_INCLUDE_MALLOC_H
+      #include "/usr/include/malloc.h"
+    #else                                      /* HAVE_USR_INCLUDE_MALLOC_H */
+      #ifndef STRUCT_MALLINFO_DECLARED
+        /* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is
+         * defined */
+        #define _STRUCT_MALLINFO
+        #define STRUCT_MALLINFO_DECLARED 1
+struct mallinfo {
+
+  MALLINFO_FIELD_TYPE arena;     /* non-mmapped space allocated from system */
+  MALLINFO_FIELD_TYPE ordblks;                     /* number of free chunks */
+  MALLINFO_FIELD_TYPE smblks;                                   /* always 0 */
+  MALLINFO_FIELD_TYPE hblks;                                    /* always 0 */
+  MALLINFO_FIELD_TYPE hblkhd;                   /* space in mmapped regions */
+  MALLINFO_FIELD_TYPE usmblks;             /* maximum total allocated space */
+  MALLINFO_FIELD_TYPE fsmblks;                                  /* always 0 */
+  MALLINFO_FIELD_TYPE uordblks;                    /* total allocated space */
+  MALLINFO_FIELD_TYPE fordblks;                         /* total free space */
+  MALLINFO_FIELD_TYPE keepcost;       /* releasable (via malloc_trim) space */
+
+};
+
+      #endif                                    /* STRUCT_MALLINFO_DECLARED */
+    #endif                                     /* HAVE_USR_INCLUDE_MALLOC_H */
+  #endif                                                     /* NO_MALLINFO */
+
+/*
+  Try to persuade compilers to inline. The most critical functions for
+  inlining are defined as macros, so these aren't used for them.
+*/
+
+  #ifndef FORCEINLINE
+    #if defined(__GNUC__)
+      #define FORCEINLINE __inline __attribute__((always_inline))
+    #elif defined(_MSC_VER)
+      #define FORCEINLINE __forceinline
+    #endif
+  #endif
+  #ifndef NOINLINE
+    #if defined(__GNUC__)
+      #define NOINLINE __attribute__((noinline))
+    #elif defined(_MSC_VER)
+      #define NOINLINE __declspec(noinline)
+    #else
+      #define NOINLINE
+    #endif
+  #endif
+
+  #ifdef __cplusplus
+extern "C" {
+
+    #ifndef FORCEINLINE
+      #define FORCEINLINE inline
+    #endif
+  #endif                                                     /* __cplusplus */
+  #ifndef FORCEINLINE
+    #define FORCEINLINE
+  #endif
+
+  #if !ONLY_MSPACES
+
+  /* ------------------- Declarations of public routines ------------------- */
+
+    #ifndef USE_DL_PREFIX
+      #define dlcalloc calloc
+      #define dlfree free
+      #define dlmalloc malloc
+      #define dlmemalign memalign
+      #define dlposix_memalign posix_memalign
+      #define dlrealloc realloc
+      #define dlrealloc_in_place realloc_in_place
+      #define dlvalloc valloc
+      #define dlpvalloc pvalloc
+      #define dlmallinfo mallinfo
+      #define dlmallopt mallopt
+      #define dlmalloc_trim malloc_trim
+      #define dlmalloc_stats malloc_stats
+      #define dlmalloc_usable_size malloc_usable_size
+      #define dlmalloc_footprint malloc_footprint
+      #define dlmalloc_max_footprint malloc_max_footprint
+      #define dlmalloc_footprint_limit malloc_footprint_limit
+      #define dlmalloc_set_footprint_limit malloc_set_footprint_limit
+      #define dlmalloc_inspect_all malloc_inspect_all
+      #define dlindependent_calloc independent_calloc
+      #define dlindependent_comalloc independent_comalloc
+      #define dlbulk_free bulk_free
+    #endif                                                 /* USE_DL_PREFIX */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or
+  null if no space is available, in which case errno is set to ENOMEM
+  on ANSI C systems.
+
+  If n is zero, malloc returns a minimum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
+  systems.)  Note that size_t is an unsigned type, so calls with
+  arguments that would be negative if signed are interpreted as
+  requests for huge amounts of space, which will often fail. The
+  maximum supported value of n differs across systems, but is in all
+  cases less than the maximum representable value of a size_t.
+*/
+DLMALLOC_EXPORT void *dlmalloc(size_t);
+
+/*
+  free(void* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. If p was not malloced or already
+  freed, free(p) will by default cause the current program to abort.
+*/
+DLMALLOC_EXPORT void dlfree(void *);
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+DLMALLOC_EXPORT void *dlcalloc(size_t, size_t);
+
+/*
+  realloc(void* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available.
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p in most cases when possible, otherwise it
+  employs the equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  realloc with a size
+  argument of zero (re)allocates a minimum-sized chunk.
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+DLMALLOC_EXPORT void *dlrealloc(void *, size_t);
+
+/*
+  realloc_in_place(void* p, size_t n)
+  Resizes the space allocated for p to size n, only if this can be
+  done without moving p (i.e., only if there is adjacent space
+  available if n is greater than p's current allocated size, or n is
+  less than or equal to p's size). This may be used instead of plain
+  realloc if an alternative allocation strategy is needed upon failure
+  to expand space; for example, reallocation of a buffer that must be
+  memory-aligned or cleared. You can use realloc_in_place to trigger
+  these alternatives only when needed.
+
+  Returns p if successful; otherwise null.
+*/
+DLMALLOC_EXPORT void *dlrealloc_in_place(void *, size_t);
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+DLMALLOC_EXPORT void *dlmemalign(size_t, size_t);
+
+/*
+  int posix_memalign(void** pp, size_t alignment, size_t n);
+  Allocates a chunk of n bytes, aligned in accord with the alignment
+  argument. Differs from memalign only in that it (1) assigns the
+  allocated memory to *pp rather than returning it, (2) fails and
+  returns EINVAL if the alignment is not a power of two (3) fails and
+  returns ENOMEM if memory cannot be allocated.
+*/
+DLMALLOC_EXPORT int dlposix_memalign(void **, size_t, size_t);
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+DLMALLOC_EXPORT void *dlvalloc(size_t);
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  To workaround the fact that mallopt is specified to use int,
+  not size_t parameters, the value -1 is specially treated as the
+  maximum unsigned size_t value.
+
+  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  None of these are use in this malloc,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #  default    allowed param values
+  M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1 disables)
+  M_GRANULARITY        -2     page size   any power of 2 >= page size
+  M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
+*/
+DLMALLOC_EXPORT int dlmallopt(int, int);
+
+/*
+  malloc_footprint();
+  Returns the number of bytes obtained from the system.  The total
+  number of bytes allocated by malloc, realloc etc., is less than this
+  value. Unlike mallinfo, this function returns only a precomputed
+  result, so can be called frequently to monitor memory consumption.
+  Even if locks are otherwise defined, this function does not use them,
+  so results might not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint(void);
+
+/*
+  malloc_max_footprint();
+  Returns the maximum number of bytes obtained from the system. This
+  value will be greater than current footprint if deallocated space
+  has been reclaimed by the system. The peak number of bytes allocated
+  by malloc, realloc etc., is less than this value. Unlike mallinfo,
+  this function returns only a precomputed result, so can be called
+  frequently to monitor memory consumption.  Even if locks are
+  otherwise defined, this function does not use them, so results might
+  not be up to date.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void);
+
+/*
+  malloc_footprint_limit();
+  Returns the number of bytes that the heap is allowed to obtain from
+  the system, returning the last value returned by
+  malloc_set_footprint_limit, or the maximum size_t value if
+  never set. The returned value reflects a permission. There is no
+  guarantee that this number of bytes can actually be obtained from
+  the system.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
+
+/*
+  malloc_set_footprint_limit();
+  Sets the maximum number of bytes to obtain from the system, causing
+  failure returns from malloc and related functions upon attempts to
+  exceed this value. The argument value may be subject to page
+  rounding to an enforceable limit; this actual value is returned.
+  Using an argument of the maximum possible size_t effectively
+  disables checks. If the argument is less than or equal to the
+  current malloc_footprint, then all future allocations that require
+  additional system memory will fail. However, invocation cannot
+  retroactively deallocate existing used memory.
+*/
+DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
+
+    #if MALLOC_INSPECT_ALL
+/*
+  malloc_inspect_all(void(*handler)(void *start,
+                                    void *end,
+                                    size_t used_bytes,
+                                    void* callback_arg),
+                      void* arg);
+  Traverses the heap and calls the given handler for each managed
+  region, skipping all bytes that are (or may be) used for bookkeeping
+  purposes.  Traversal does not include include chunks that have been
+  directly memory mapped. Each reported region begins at the start
+  address, and continues up to but not including the end address.  The
+  first used_bytes of the region contain allocated data. If
+  used_bytes is zero, the region is unallocated. The handler is
+  invoked with the given callback argument. If locks are defined, they
+  are held during the entire traversal. It is a bad idea to invoke
+  other malloc functions from within the handler.
+
+  For example, to count the number of in-use chunks with size greater
+  than 1000, you could write:
+  static int count = 0;
+  void count_chunks(void* start, void* end, size_t used, void* arg) {
+
+    if (used >= 1000) ++count;
+
+  }
+
+  then:
+    malloc_inspect_all(count_chunks, NULL);
+
+  malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
+*/
+DLMALLOC_EXPORT void dlmalloc_inspect_all(void (*handler)(void *, void *,
+                                                          size_t, void *),
+                                          void *arg);
+
+    #endif                                            /* MALLOC_INSPECT_ALL */
+
+    #if !NO_MALLINFO
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system
+  ordblks:   the number of free chunks
+  smblks:    always zero.
+  hblks:     current number of mmapped regions
+  hblkhd:    total bytes held in mmapped regions
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   always zero
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and
+  thus be inaccurate.
+*/
+DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
+    #endif                                                   /* NO_MALLINFO */
+
+/*
+  independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
+
+  independent_calloc is similar to calloc, but instead of returning a
+  single cleared space, it returns an array of pointers to n_elements
+  independent elements that can hold contents of size elem_size, each
+  of which starts out cleared, and can be independently freed,
+  realloc'ed etc. The elements are guaranteed to be adjacently
+  allocated (this is not guaranteed to occur with multiple callocs or
+  mallocs), which may also improve cache locality in some
+  applications.
+
+  The "chunks" argument is optional (i.e., may be null, which is
+  probably the most typical usage). If it is null, the returned array
+  is itself dynamically allocated and should also be freed when it is
+  no longer needed. Otherwise, the chunks array must be of at least
+  n_elements in length. It is filled in with the pointers to the
+  chunks.
+
+  In either case, independent_calloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and "chunks"
+  is null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_calloc simplifies and speeds up implementations of many
+  kinds of pools.  It may also be useful when constructing large data
+  structures that initially have a fixed number of fixed-sized nodes,
+  but the number is not known at compile time, and some of the nodes
+  may later need to be freed. For example:
+
+  struct Node { int item; struct Node* next; };
+
+  struct Node* build_list() {
+
+    struct Node** pool;
+    int n = read_number_of_nodes_needed();
+    if (n <= 0) return 0;
+    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
+    if (pool == 0) die();
+    // organize into a linked list...
+    struct Node* first = pool[0];
+    for (i = 0; i < n-1; ++i)
+      pool[i]->next = pool[i+1];
+    free(pool);     // Can now free the array (or not, if it is needed later)
+    return first;
+
+  }
+
+*/
+DLMALLOC_EXPORT void **dlindependent_calloc(size_t, size_t, void **);
+
+/*
+  independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
+
+  independent_comalloc allocates, all at once, a set of n_elements
+  chunks with sizes indicated in the "sizes" array.    It returns
+  an array of pointers to these elements, each of which can be
+  independently freed, realloc'ed etc. The elements are guaranteed to
+  be adjacently allocated (this is not guaranteed to occur with
+  multiple callocs or mallocs), which may also improve cache locality
+  in some applications.
+
+  The "chunks" argument is optional (i.e., may be null). If it is null
+  the returned array is itself dynamically allocated and should also
+  be freed when it is no longer needed. Otherwise, the chunks array
+  must be of at least n_elements in length. It is filled in with the
+  pointers to the chunks.
+
+  In either case, independent_comalloc returns this pointer array, or
+  null if the allocation failed.  If n_elements is zero and chunks is
+  null, it returns a chunk representing an array with zero elements
+  (which should be freed if not wanted).
+
+  Each element must be freed when it is no longer needed. This can be
+  done all at once using bulk_free.
+
+  independent_comallac differs from independent_calloc in that each
+  element may have a different size, and also that it does not
+  automatically clear elements.
+
+  independent_comalloc can be used to speed up allocation in cases
+  where several structs or objects must always be allocated at the
+  same time.  For example:
+
+  struct Head { ... }
+  struct Foot { ... }
+
+  void send_message(char* msg) {
+
+    int msglen = strlen(msg);
+    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
+    void* chunks[3];
+    if (independent_comalloc(3, sizes, chunks) == 0)
+      die();
+    struct Head* head = (struct Head*)(chunks[0]);
+    char*        body = (char*)(chunks[1]);
+    struct Foot* foot = (struct Foot*)(chunks[2]);
+    // ...
+
+  }
+
+  In general though, independent_comalloc is worth using only for
+  larger values of n_elements. For small values, you probably won't
+  detect enough difference from series of malloc calls to bother.
+
+  Overuse of independent_comalloc can increase overall memory usage,
+  since it cannot reuse existing noncontiguous small chunks that
+  might be available for some of the elements.
+*/
+DLMALLOC_EXPORT void **dlindependent_comalloc(size_t, size_t *, void **);
+
+/*
+  bulk_free(void* array[], size_t n_elements)
+  Frees and clears (sets to null) each non-null pointer in the given
+  array.  This is likely to be faster than freeing them one-by-one.
+  If footers are used, pointers that have been allocated in different
+  mspaces are not freed or cleared, and the count of all such pointers
+  is returned.  For large arrays of pointers with poor locality, it
+  may be worthwhile to sort this array before calling bulk_free.
+*/
+DLMALLOC_EXPORT size_t dlbulk_free(void **, size_t n_elements);
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+DLMALLOC_EXPORT void *dlpvalloc(size_t);
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative arguments
+  to sbrk) if there is unused memory at the `high' end of the malloc
+  pool or in unused MMAP segments. You can call this after freeing
+  large blocks of memory to potentially reduce the system-level memory
+  requirements of a program. However, it cannot guarantee to reduce
+  memory. Under some allocation patterns, some large free blocks of
+  memory will be locked between two used chunks, so they cannot be
+  given back to the system.
+
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero, only
+  the minimum amount of memory to maintain internal data structures
+  will be left. Non-zero arguments can be supplied to maintain enough
+  trailing space to service future expected allocations without having
+  to re-obtain memory from the system.
+
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+*/
+DLMALLOC_EXPORT int dlmalloc_trim(size_t);
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+*/
+DLMALLOC_EXPORT void dlmalloc_stats(void);
+
+/*
+  malloc_usable_size(void* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+*/
+size_t dlmalloc_usable_size(void *);
+
+  #endif                                                    /* ONLY_MSPACES */
+
+  #if MSPACES
+
+/*
+  mspace is an opaque type representing an independent
+  region of space that supports mspace_malloc, etc.
+*/
+typedef void *mspace;
+
+/*
+  create_mspace creates and returns a new independent space with the
+  given initial capacity, or, if 0, the default granularity size.  It
+  returns null if there is no system memory available to create the
+  space.  If argument locked is non-zero, the space uses a separate
+  lock to control access. The capacity of the space will grow
+  dynamically as needed to service mspace_malloc requests.  You can
+  control the sizes of incremental increases of this space by
+  compiling with a different DEFAULT_GRANULARITY or dynamically
+  setting with mallopt(M_GRANULARITY, value).
+*/
+DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked);
+
+/*
+  destroy_mspace destroys the given space, and attempts to return all
+  of its memory back to the system, returning the total number of
+  bytes freed. After destruction, the results of access to all memory
+  used by the space become undefined.
+*/
+DLMALLOC_EXPORT size_t destroy_mspace(mspace msp);
+
+/*
+  create_mspace_with_base uses the memory supplied as the initial base
+  of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
+  space is used for bookkeeping, so the capacity must be at least this
+  large. (Otherwise 0 is returned.) When this initial space is
+  exhausted, additional memory will be obtained from the system.
+  Destroying this space will deallocate all additionally allocated
+  space (if possible) but not the initial base.
+*/
+DLMALLOC_EXPORT mspace create_mspace_with_base(void *base, size_t capacity,
+                                               int locked);
+
+/*
+  mspace_track_large_chunks controls whether requests for large chunks
+  are allocated in their own untracked mmapped regions, separate from
+  others in this mspace. By default large chunks are not tracked,
+  which reduces fragmentation. However, such chunks are not
+  necessarily released to the system upon destroy_mspace.  Enabling
+  tracking by setting to true may increase fragmentation, but avoids
+  leakage when relying on destroy_mspace to release all memory
+  allocated using this space.  The function returns the previous
+  setting.
+*/
+DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable);
+
+/*
+  mspace_malloc behaves as malloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void *mspace_malloc(mspace msp, size_t bytes);
+
+/*
+  mspace_free behaves as free, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_free is not actually needed.
+  free may be called instead of mspace_free because freed chunks from
+  any space are handled by their originating spaces.
+*/
+DLMALLOC_EXPORT void mspace_free(mspace msp, void *mem);
+
+/*
+  mspace_realloc behaves as realloc, but operates within
+  the given space.
+
+  If compiled with FOOTERS==1, mspace_realloc is not actually
+  needed.  realloc may be called instead of mspace_realloc because
+  realloced chunks from any space are handled by their originating
+  spaces.
+*/
+DLMALLOC_EXPORT void *mspace_realloc(mspace msp, void *mem, size_t newsize);
+
+/*
+  mspace_calloc behaves as calloc, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void *mspace_calloc(mspace msp, size_t n_elements,
+                                    size_t elem_size);
+
+/*
+  mspace_memalign behaves as memalign, but operates within
+  the given space.
+*/
+DLMALLOC_EXPORT void *mspace_memalign(mspace msp, size_t alignment,
+                                      size_t bytes);
+
+/*
+  mspace_independent_calloc behaves as independent_calloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void **mspace_independent_calloc(mspace msp, size_t n_elements,
+                                                 size_t elem_size,
+                                                 void * chunks[]);
+
+/*
+  mspace_independent_comalloc behaves as independent_comalloc, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT void **mspace_independent_comalloc(mspace msp,
+                                                   size_t n_elements,
+                                                   size_t sizes[],
+                                                   void * chunks[]);
+
+/*
+  mspace_footprint() returns the number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
+
+/*
+  mspace_max_footprint() returns the peak number of bytes obtained from the
+  system for this space.
+*/
+DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
+
+    #if !NO_MALLINFO
+/*
+  mspace_mallinfo behaves as mallinfo, but reports properties of
+  the given space.
+*/
+DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
+    #endif                                                   /* NO_MALLINFO */
+
+/*
+  malloc_usable_size(void* p) behaves the same as malloc_usable_size;
+*/
+DLMALLOC_EXPORT size_t mspace_usable_size(const void *mem);
+
+/*
+  mspace_malloc_stats behaves as malloc_stats, but reports
+  properties of the given space.
+*/
+DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp);
+
+/*
+  mspace_trim behaves as malloc_trim, but
+  operates within the given space.
+*/
+DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
+
+/*
+  An alias for mallopt.
+*/
+DLMALLOC_EXPORT int mspace_mallopt(int, int);
+
+  #endif                                                         /* MSPACES */
+
+  #ifdef __cplusplus
+
+}                                                      /* end of extern "C" */
+
+  #endif                                                     /* __cplusplus */
+
+/*
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/*------------------------------ internal #includes ---------------------- */
+
+  #ifdef _MSC_VER
+    #pragma warning(disable : 4146)               /* no "unsigned" warnings */
+  #endif                                                        /* _MSC_VER */
+  #if !NO_MALLOC_STATS
+    #include <stdio.h>                      /* for printing in malloc_stats */
+  #endif                                                 /* NO_MALLOC_STATS */
+  #ifndef LACKS_ERRNO_H
+    #include <errno.h>                         /* for MALLOC_FAILURE_ACTION */
+  #endif                                                   /* LACKS_ERRNO_H */
+  #ifdef DEBUG
+    #if ABORT_ON_ASSERT_FAILURE
+      #undef assert
+      #define assert(x) \
+        if (!(x)) ABORT
+    #else                                        /* ABORT_ON_ASSERT_FAILURE */
+      #include <assert.h>
+    #endif                                       /* ABORT_ON_ASSERT_FAILURE */
+  #else                                                            /* DEBUG */
+    #ifndef assert
+      #define assert(x)
+    #endif
+    #define DEBUG 0
+  #endif                                                           /* DEBUG */
+  #if !defined(WIN32) && !defined(LACKS_TIME_H)
+    #include <time.h>                           /* for magic initialization */
+  #endif                                                           /* WIN32 */
+  #ifndef LACKS_STDLIB_H
+    #include <stdlib.h>                                      /* for abort() */
+  #endif                                                  /* LACKS_STDLIB_H */
+  #ifndef LACKS_STRING_H
+    #include <string.h>                                   /* for memset etc */
+  #endif                                                  /* LACKS_STRING_H */
+  #if USE_BUILTIN_FFS
+    #ifndef LACKS_STRINGS_H
+      #include <strings.h>                                       /* for ffs */
+    #endif                                               /* LACKS_STRINGS_H */
+  #endif                                                 /* USE_BUILTIN_FFS */
+  #if HAVE_MMAP
+    #ifndef LACKS_SYS_MMAN_H
+      /* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+      #if (defined(linux) && !defined(__USE_GNU))
+        #define __USE_GNU 1
+        #include <sys/mman.h>                                   /* for mmap */
+        #undef __USE_GNU
+      #else
+        #include <sys/mman.h>                                   /* for mmap */
+      #endif                                                       /* linux */
+    #endif                                              /* LACKS_SYS_MMAN_H */
+    #ifndef LACKS_FCNTL_H
+      #include <fcntl.h>
+    #endif                                                 /* LACKS_FCNTL_H */
+  #endif                                                       /* HAVE_MMAP */
+  #ifndef LACKS_UNISTD_H
+    #include <unistd.h>                                /* for sbrk, sysconf */
+  #else                                                   /* LACKS_UNISTD_H */
+    #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+extern void *sbrk(ptrdiff_t);
+    #endif                                                   /* FreeBSD etc */
+  #endif                                                  /* LACKS_UNISTD_H */
+
+  /* Declarations for locking */
+  #if USE_LOCKS
+    #ifndef WIN32
+      #if defined(__SVR4) && defined(__sun)                      /* solaris */
+        #include <thread.h>
+      #elif !defined(LACKS_SCHED_H)
+        #include <sched.h>
+      #endif                                    /* solaris or LACKS_SCHED_H */
+      #if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || \
+          !USE_SPIN_LOCKS
+        #include <pthread.h>
+      #endif                                     /* USE_RECURSIVE_LOCKS ... */
+    #elif defined(_MSC_VER)
+      #ifndef _M_AMD64
+        /* These are already defined on AMD64 builds */
+        #ifdef __cplusplus
+extern "C" {
+
+        #endif                                               /* __cplusplus */
+LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange,
+                                         LONG Comp);
+LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
+        #ifdef __cplusplus
+
+}
+
+        #endif                                               /* __cplusplus */
+      #endif                                                    /* _M_AMD64 */
+      #pragma intrinsic(_InterlockedCompareExchange)
+      #pragma intrinsic(_InterlockedExchange)
+      #define interlockedcompareexchange _InterlockedCompareExchange
+      #define interlockedexchange _InterlockedExchange
+    #elif defined(WIN32) && defined(__GNUC__)
+      #define interlockedcompareexchange(a, b, c) \
+        __sync_val_compare_and_swap(a, c, b)
+      #define interlockedexchange __sync_lock_test_and_set
+    #endif                                                         /* Win32 */
+  #else                                                        /* USE_LOCKS */
+  #endif                                                       /* USE_LOCKS */
+
+  #ifndef LOCK_AT_FORK
+    #define LOCK_AT_FORK 0
+  #endif
+
+  /* Declarations for bit scanning on win32 */
+  #if defined(_MSC_VER) && _MSC_VER >= 1300
+    #ifndef BitScanForward               /* Try to avoid pulling in WinNT.h */
+      #ifdef __cplusplus
+extern "C" {
+
+      #endif                                                 /* __cplusplus */
+unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+      #ifdef __cplusplus
+
+}
+
+      #endif                                                 /* __cplusplus */
+
+      #define BitScanForward _BitScanForward
+      #define BitScanReverse _BitScanReverse
+      #pragma intrinsic(_BitScanForward)
+      #pragma intrinsic(_BitScanReverse)
+    #endif                                                /* BitScanForward */
+  #endif                             /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+  #ifndef WIN32
+    #ifndef malloc_getpagesize
+      #ifdef _SC_PAGESIZE           /* some SVR4 systems omit an underscore */
+        #ifndef _SC_PAGE_SIZE
+          #define _SC_PAGE_SIZE _SC_PAGESIZE
+        #endif
+      #endif
+      #ifdef _SC_PAGE_SIZE
+        #define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+      #else
+        #if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+extern size_t getpagesize();
+          #define malloc_getpagesize getpagesize()
+        #else
+          #ifdef WIN32             /* use supplied emulation of getpagesize */
+            #define malloc_getpagesize getpagesize()
+          #else
+            #ifndef LACKS_SYS_PARAM_H
+              #include <sys/param.h>
+            #endif
+            #ifdef EXEC_PAGESIZE
+              #define malloc_getpagesize EXEC_PAGESIZE
+            #else
+              #ifdef NBPG
+                #ifndef CLSIZE
+                  #define malloc_getpagesize NBPG
+                #else
+                  #define malloc_getpagesize (NBPG * CLSIZE)
+                #endif
+              #else
+                #ifdef NBPC
+                  #define malloc_getpagesize NBPC
+                #else
+                  #ifdef PAGESIZE
+                    #define malloc_getpagesize PAGESIZE
+                  #else                                       /* just guess */
+                    #define malloc_getpagesize ((size_t)4096U)
+                  #endif
+                #endif
+              #endif
+            #endif
+          #endif
+        #endif
+      #endif
+    #endif
+  #endif
+
+  /* ------------------- size_t and alignment properties -------------------- */
+
+  /* The byte and bit size of a size_t */
+  #define SIZE_T_SIZE (sizeof(size_t))
+  #define SIZE_T_BITSIZE (sizeof(size_t) << 3)
+
+  /* Some constants coerced to size_t */
+  /* Annoying but necessary to avoid errors on some platforms */
+  #define SIZE_T_ZERO ((size_t)0)
+  #define SIZE_T_ONE ((size_t)1)
+  #define SIZE_T_TWO ((size_t)2)
+  #define SIZE_T_FOUR ((size_t)4)
+  #define TWO_SIZE_T_SIZES (SIZE_T_SIZE << 1)
+  #define FOUR_SIZE_T_SIZES (SIZE_T_SIZE << 2)
+  #define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES + TWO_SIZE_T_SIZES)
+  #define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
+
+  /* The bit mask value corresponding to MALLOC_ALIGNMENT */
+  #define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+  /* True if address a has acceptable alignment */
+  #define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+
+  /* the number of bytes to offset an address to align it */
+  #define align_offset(A)                                         \
+    ((((size_t)(A)&CHUNK_ALIGN_MASK) == 0)                        \
+         ? 0                                                      \
+         : ((MALLOC_ALIGNMENT - ((size_t)(A)&CHUNK_ALIGN_MASK)) & \
+            CHUNK_ALIGN_MASK))
+
+  /* -------------------------- MMAP preliminaries ------------------------- */
+
+  /*
+     If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+     checks to fail so compiler optimizer can delete code rather than
+     using so many "#if"s.
+  */
+
+  /* MORECORE and MMAP must return MFAIL on failure */
+  #define MFAIL ((void *)(MAX_SIZE_T))
+  #define CMFAIL ((char *)(MFAIL))               /* defined for convenience */
+
+  #if HAVE_MMAP
+
+    #ifndef WIN32
+      #define MMAP_PROT (PROT_READ | PROT_WRITE)
+      #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+        #define MAP_ANONYMOUS MAP_ANON
+      #endif                                                    /* MAP_ANON */
+      #ifdef MAP_ANONYMOUS
+
+        #define MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+
+static FORCEINLINE void *unixmmap(size_t size) {
+
+  void *result;
+
+  result = mmap(0, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
+  if (result == MFAIL) return MFAIL;
+
+  return result;
+
+}
+
+static FORCEINLINE int unixmunmap(void *ptr, size_t size) {
+
+  int result;
+
+  result = munmap(ptr, size);
+  if (result != 0) return result;
+
+  return result;
+
+}
+
+        #define MMAP_DEFAULT(s) unixmmap(s)
+        #define MUNMAP_DEFAULT(a, s) unixmunmap((a), (s))
+
+      #else                                                /* MAP_ANONYMOUS */
+        /*
+           Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+           is unlikely to be needed, but is supplied just in case.
+        */
+        #define MMAP_FLAGS (MAP_PRIVATE)
+static int dev_zero_fd = -1;       /* Cached file descriptor for /dev/zero. */
+        #define MMAP_DEFAULT(s)                                        \
+          ((dev_zero_fd < 0)                                           \
+               ? (dev_zero_fd = open("/dev/zero", O_RDWR),             \
+                  mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) \
+               : mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+        #define MUNMAP_DEFAULT(a, s) munmap((a), (s))
+      #endif                                               /* MAP_ANONYMOUS */
+
+      #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+
+    #else                                                          /* WIN32 */
+
+/* Win32 MMAP via VirtualAlloc */
+static FORCEINLINE void *win32mmap(size_t size) {
+
+  void *ptr;
+
+  ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  if (ptr == 0) return MFAIL;
+
+  return ptr;
+
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static FORCEINLINE void *win32direct_mmap(size_t size) {
+
+  void *ptr;
+
+  ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN,
+                     PAGE_READWRITE);
+  if (ptr == 0) return MFAIL;
+
+  return ptr;
+
+}
+
+/* This function supports releasing coalesed segments */
+static FORCEINLINE int win32munmap(void *ptr, size_t size) {
+
+  MEMORY_BASIC_INFORMATION minfo;
+  char *cptr = (char *)ptr;
+
+  while (size) {
+
+    if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) return -1;
+    if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+        minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+      return -1;
+    if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) return -1;
+    cptr += minfo.RegionSize;
+    size -= minfo.RegionSize;
+
+  }
+
+  return 0;
+
+}
+
+      #define MMAP_DEFAULT(s) win32mmap(s)
+      #define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
+      #define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
+    #endif                                                         /* WIN32 */
+  #endif                                                       /* HAVE_MMAP */
+
+  #if HAVE_MREMAP
+    #ifndef WIN32
+
+static FORCEINLINE void *dlmremap(void *old_address, size_t old_size,
+                                  size_t new_size, int flags) {
+
+  void *result;
+
+  result = mremap(old_address, old_size, new_size, flags);
+  if (result == MFAIL) return MFAIL;
+
+  return result;
+
+}
+
+      #define MREMAP_DEFAULT(addr, osz, nsz, mv) \
+        dlmremap((addr), (osz), (nsz), (mv))
+    #endif                                                         /* WIN32 */
+  #endif                                                     /* HAVE_MREMAP */
+
+  /**
+   * Define CALL_MORECORE
+   */
+  #if HAVE_MORECORE
+    #ifdef MORECORE
+      #define CALL_MORECORE(S) MORECORE(S)
+    #else                                                       /* MORECORE */
+      #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
+    #endif                                                      /* MORECORE */
+  #else                                                    /* HAVE_MORECORE */
+    #define CALL_MORECORE(S) MFAIL
+  #endif                                                   /* HAVE_MORECORE */
+
+  /**
+   * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+   */
+  #if HAVE_MMAP
+    #define USE_MMAP_BIT (SIZE_T_ONE)
+
+    #ifdef MMAP
+      #define CALL_MMAP(s) MMAP(s)
+    #else                                                           /* MMAP */
+      #define CALL_MMAP(s) MMAP_DEFAULT(s)
+    #endif                                                          /* MMAP */
+    #ifdef MUNMAP
+      #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+    #else                                                         /* MUNMAP */
+      #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
+    #endif                                                        /* MUNMAP */
+    #ifdef DIRECT_MMAP
+      #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #else                                                    /* DIRECT_MMAP */
+      #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+    #endif                                                   /* DIRECT_MMAP */
+  #else                                                        /* HAVE_MMAP */
+    #define USE_MMAP_BIT (SIZE_T_ZERO)
+
+    #define MMAP(s) MFAIL
+    #define MUNMAP(a, s) (-1)
+    #define DIRECT_MMAP(s) MFAIL
+    #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #define CALL_MMAP(s) MMAP(s)
+    #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+  #endif                                                       /* HAVE_MMAP */
+
+  /**
+   * Define CALL_MREMAP
+   */
+  #if HAVE_MMAP && HAVE_MREMAP
+    #ifdef MREMAP
+      #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else                                                         /* MREMAP */
+      #define CALL_MREMAP(addr, osz, nsz, mv) \
+        MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif                                                        /* MREMAP */
+  #else                                         /* HAVE_MMAP && HAVE_MREMAP */
+    #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
+  #endif                                        /* HAVE_MMAP && HAVE_MREMAP */
+
+  /* mstate bit set if continguous morecore disabled or failed */
+  #define USE_NONCONTIGUOUS_BIT (4U)
+
+  /* segment bit set in create_mspace_with_base */
+  #define EXTERN_BIT (8U)
+
+/* --------------------------- Lock preliminaries ------------------------ */
+
+/*
+  When locks are defined, there is one global lock, plus
+  one per-mspace lock.
+
+  The global lock_ensures that mparams.magic and other unique
+  mparams values are initialized only once. It also protects
+  sequences of calls to MORECORE.  In many cases sys_alloc requires
+  two calls, that should not be interleaved with calls by other
+  threads.  This does not protect against direct calls to MORECORE
+  by other threads not using this lock, so there is still code to
+  cope the best we can on interference.
+
+  Per-mspace locks surround calls to malloc, free, etc.
+  By default, locks are simple non-reentrant mutexes.
+
+  Because lock-protected regions generally have bounded times, it is
+  OK to use the supplied simple spinlocks. Spinlocks are likely to
+  improve performance for lightly contended applications, but worsen
+  performance under heavy contention.
+
+  If USE_LOCKS is > 1, the definitions of lock routines here are
+  bypassed, in which case you will need to define the type MLOCK_T,
+  and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK
+  and TRY_LOCK.  You must also declare a
+    static MLOCK_T malloc_global_mutex = { initialization values };.
+
+*/
+
+  #if !USE_LOCKS
+    #define USE_LOCK_BIT (0U)
+    #define INITIAL_LOCK(l) (0)
+    #define DESTROY_LOCK(l) (0)
+    #define ACQUIRE_MALLOC_GLOBAL_LOCK()
+    #define RELEASE_MALLOC_GLOBAL_LOCK()
+
+  #else
+    #if USE_LOCKS > 1
+    /* -----------------------  User-defined locks ------------------------ */
+    /* Define your own lock implementation here */
+    /* #define INITIAL_LOCK(lk)  ... */
+    /* #define DESTROY_LOCK(lk)  ... */
+    /* #define ACQUIRE_LOCK(lk)  ... */
+    /* #define RELEASE_LOCK(lk)  ... */
+    /* #define TRY_LOCK(lk) ... */
+    /* static MLOCK_T malloc_global_mutex = ... */
+
+    #elif USE_SPIN_LOCKS
+
+    /* First, define CAS_LOCK and CLEAR_LOCK on ints */
+    /* Note CAS_LOCK defined to return 0 on success */
+
+      #if defined(__GNUC__) && \
+          (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+        #define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1)
+        #define CLEAR_LOCK(sl) __sync_lock_release(sl)
+
+      #elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+/* Custom spin locks for older gcc on x86 */
+static FORCEINLINE int x86_cas_lock(int *sl) {
+
+  int ret;
+  int val = 1;
+  int cmp = 0;
+  __asm__ __volatile__("lock; cmpxchgl %1, %2"
+                       : "=a"(ret)
+                       : "r"(val), "m"(*(sl)), "0"(cmp)
+                       : "memory", "cc");
+  return ret;
+
+}
+
+static FORCEINLINE void x86_clear_lock(int *sl) {
+
+  assert(*sl != 0);
+  int prev = 0;
+  int ret;
+  __asm__ __volatile__("lock; xchgl %0, %1"
+                       : "=r"(ret)
+                       : "m"(*(sl)), "0"(prev)
+                       : "memory");
+
+}
+
+        #define CAS_LOCK(sl) x86_cas_lock(sl)
+        #define CLEAR_LOCK(sl) x86_clear_lock(sl)
+
+      #else                                                    /* Win32 MSC */
+        #define CAS_LOCK(sl) interlockedexchange((volatile LONG *)sl, (LONG)1)
+        #define CLEAR_LOCK(sl) interlockedexchange((volatile LONG *)sl, (LONG)0)
+
+      #endif                                     /* ... gcc spins locks ... */
+
+      /* How to yield for a spin lock */
+      #define SPINS_PER_YIELD 63
+      #if defined(_MSC_VER)
+        #define SLEEP_EX_DURATION 50               /* delay for yield/sleep */
+        #define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE)
+      #elif defined(__SVR4) && defined(__sun)                    /* solaris */
+        #define SPIN_LOCK_YIELD thr_yield();
+      #elif !defined(LACKS_SCHED_H)
+        #define SPIN_LOCK_YIELD sched_yield();
+      #else
+        #define SPIN_LOCK_YIELD
+      #endif                                               /* ... yield ... */
+
+      #if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
+/* Plain spin locks use single word (embedded in malloc_states) */
+static int spin_acquire_lock(int *sl) {
+
+  int spins = 0;
+  while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
+
+    if ((++spins & SPINS_PER_YIELD) == 0) { SPIN_LOCK_YIELD; }
+
+  }
+
+  return 0;
+
+}
+
+        #define MLOCK_T int
+        #define TRY_LOCK(sl) !CAS_LOCK(sl)
+        #define RELEASE_LOCK(sl) CLEAR_LOCK(sl)
+        #define ACQUIRE_LOCK(sl) (CAS_LOCK(sl) ? spin_acquire_lock(sl) : 0)
+        #define INITIAL_LOCK(sl) (*sl = 0)
+        #define DESTROY_LOCK(sl) (0)
+static MLOCK_T malloc_global_mutex = 0;
+
+      #else                                          /* USE_RECURSIVE_LOCKS */
+        /* types for lock owners */
+        #ifdef WIN32
+          #define THREAD_ID_T DWORD
+          #define CURRENT_THREAD GetCurrentThreadId()
+          #define EQ_OWNER(X, Y) ((X) == (Y))
+        #else
+          /*
+            Note: the following assume that pthread_t is a type that can be
+            initialized to (casted) zero. If this is not the case, you will need
+            to somehow redefine these or not use spin locks.
+          */
+          #define THREAD_ID_T pthread_t
+          #define CURRENT_THREAD pthread_self()
+          #define EQ_OWNER(X, Y) pthread_equal(X, Y)
+        #endif
+
+struct malloc_recursive_lock {
+
+  int          sl;
+  unsigned int c;
+  THREAD_ID_T  threadid;
+
+};
+
+        #define MLOCK_T struct malloc_recursive_lock
+static MLOCK_T malloc_global_mutex = {0, 0, (THREAD_ID_T)0};
+
+static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
+
+  assert(lk->sl != 0);
+  if (--lk->c == 0) { CLEAR_LOCK(&lk->sl); }
+
+}
+
+static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) {
+
+  THREAD_ID_T mythreadid = CURRENT_THREAD;
+  int         spins = 0;
+  for (;;) {
+
+    if (*((volatile int *)(&lk->sl)) == 0) {
+
+      if (!CAS_LOCK(&lk->sl)) {
+
+        lk->threadid = mythreadid;
+        lk->c = 1;
+        return 0;
+
+      }
+
+    } else if (EQ_OWNER(lk->threadid, mythreadid)) {
+
+      ++lk->c;
+      return 0;
+
+    }
+
+    if ((++spins & SPINS_PER_YIELD) == 0) { SPIN_LOCK_YIELD; }
+
+  }
+
+}
+
+static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
+
+  THREAD_ID_T mythreadid = CURRENT_THREAD;
+  if (*((volatile int *)(&lk->sl)) == 0) {
+
+    if (!CAS_LOCK(&lk->sl)) {
+
+      lk->threadid = mythreadid;
+      lk->c = 1;
+      return 1;
+
+    }
+
+  } else if (EQ_OWNER(lk->threadid, mythreadid)) {
+
+    ++lk->c;
+    return 1;
+
+  }
+
+  return 0;
+
+}
+
+        #define RELEASE_LOCK(lk) recursive_release_lock(lk)
+        #define TRY_LOCK(lk) recursive_try_lock(lk)
+        #define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk)
+        #define INITIAL_LOCK(lk) \
+          ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
+        #define DESTROY_LOCK(lk) (0)
+      #endif                                         /* USE_RECURSIVE_LOCKS */
+
+    #elif defined(WIN32)                         /* Win32 critical sections */
+      #define MLOCK_T CRITICAL_SECTION
+      #define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0)
+      #define RELEASE_LOCK(lk) LeaveCriticalSection(lk)
+      #define TRY_LOCK(lk) TryEnterCriticalSection(lk)
+      #define INITIAL_LOCK(lk) \
+        (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000 | 4000))
+      #define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0)
+      #define NEED_GLOBAL_LOCK_INIT
+
+static MLOCK_T       malloc_global_mutex;
+static volatile LONG malloc_global_mutex_status;
+
+/* Use spin loop to initialize global lock */
+static void init_malloc_global_mutex() {
+
+  for (;;) {
+
+    long stat = malloc_global_mutex_status;
+    if (stat > 0) return;
+    /* transition to < 0 while initializing, then to > 0) */
+    if (stat == 0 && interlockedcompareexchange(&malloc_global_mutex_status,
+                                                (LONG)-1, (LONG)0) == 0) {
+
+      InitializeCriticalSection(&malloc_global_mutex);
+      interlockedexchange(&malloc_global_mutex_status, (LONG)1);
+      return;
+
+    }
+
+    SleepEx(0, FALSE);
+
+  }
+
+}
+
+    #else                                           /* pthreads-based locks */
+      #define MLOCK_T pthread_mutex_t
+      #define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk)
+      #define RELEASE_LOCK(lk) pthread_mutex_unlock(lk)
+      #define TRY_LOCK(lk) (!pthread_mutex_trylock(lk))
+      #define INITIAL_LOCK(lk) pthread_init_lock(lk)
+      #define DESTROY_LOCK(lk) pthread_mutex_destroy(lk)
+
+      #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && \
+          defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
+/* Cope with old-style linux recursive lock initialization by adding */
+/* skipped internal declaration from pthread.h */
+extern int pthread_mutexattr_setkind_np __P((pthread_mutexattr_t * __attr,
+                                             int __kind));
+        #define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+        #define pthread_mutexattr_settype(x, y) \
+          pthread_mutexattr_setkind_np(x, y)
+      #endif                                     /* USE_RECURSIVE_LOCKS ... */
+
+static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int pthread_init_lock(MLOCK_T *lk) {
+
+  pthread_mutexattr_t attr;
+  if (pthread_mutexattr_init(&attr)) return 1;
+      #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
+  if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
+      #endif
+  if (pthread_mutex_init(lk, &attr)) return 1;
+  if (pthread_mutexattr_destroy(&attr)) return 1;
+  return 0;
+
+}
+
+    #endif                                            /* ... lock types ... */
+
+    /* Common code for all lock types */
+    #define USE_LOCK_BIT (2U)
+
+    #ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+      #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
+    #endif
+
+    #ifndef RELEASE_MALLOC_GLOBAL_LOCK
+      #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
+    #endif
+
+  #endif                                                       /* USE_LOCKS */
+
+/* -----------------------  Chunk representations ------------------------ */
+
+/*
+  (The following includes lightly edited explanations by Colin Plumb.)
+
+  The malloc_chunk declaration below is misleading (but accurate and
+  necessary).  It declares a "view" into memory allowing access to
+  necessary fields at known offsets from a given base.
+
+  Chunks of memory are maintained using a `boundary tag' method as
+  originally described by Knuth.  (See the paper by Paul Wilson
+  ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
+  techniques.)  Sizes of free chunks are stored both in the front of
+  each chunk and at the end.  This makes consolidating fragmented
+  chunks into bigger chunks fast.  The head fields also hold bits
+  representing whether chunks are free or in use.
+
+  Here are some pictures to make it clearer.  They are "exploded" to
+  show that the state of a chunk can be thought of as extending from
+  the high 31 bits of the head field of its header through the
+  prev_foot and PINUSE_BIT bit of the following chunk header.
+
+  A chunk that's in use looks like:
+
+   chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+           | Size of previous chunk (if P = 0)                             |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         1| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               |
+         +-                                                             -+
+         |                                                               |
+         +-                                                             -+
+         |                                                               :
+         +-      size - sizeof(size_t) available payload bytes          -+
+         :                                                               |
+ chunk-> +-                                                             -+
+         |                                                               |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
+       | Size of next chunk (may or may not be in use)               | +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+    And if it's free, it looks like this:
+
+   chunk-> +-                                                             -+
+           | User payload (must be in use, or we would have merged!)       |
+           +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
+         | Size of this chunk                                         0| +-+
+   mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Next pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Prev pointer                                                  |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         |                                                               :
+         +-      size - sizeof(struct chunk) unused bytes               -+
+         :                                                               |
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+         | Size of this chunk                                            |
+         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
+       | Size of next chunk (must be in use, or we would have merged)| +-+
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+       |                                                               :
+       +- User payload                                                -+
+       :                                                               |
+       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+                                                                     |0|
+                                                                     +-+
+  Note that since we always merge adjacent free chunks, the chunks
+  adjacent to a free chunk must be in use.
+
+  Given a pointer to a chunk (which can be derived trivially from the
+  payload pointer) we can, in O(1) time, find out whether the adjacent
+  chunks are free, and if so, unlink them from the lists that they
+  are on and merge them with the current chunk.
+
+  Chunks always begin on even word boundaries, so the mem portion
+  (which is returned to the user) is also on an even word boundary, and
+  thus at least double-word aligned.
+
+  The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
+  chunk size (which is always a multiple of two words), is an in-use
+  bit for the *previous* chunk.  If that bit is *clear*, then the
+  word before the current chunk size contains the previous chunk
+  size, and can be used to find the front of the previous chunk.
+  The very first chunk allocated always has this bit set, preventing
+  access to non-existent (or non-owned) memory. If pinuse is set for
+  any given chunk, then you CANNOT determine the size of the
+  previous chunk, and might even get a memory addressing fault when
+  trying to do so.
+
+  The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
+  the chunk size redundantly records whether the current chunk is
+  inuse (unless the chunk is mmapped). This redundancy enables usage
+  checks within free and realloc, and reduces indirection when freeing
+  and consolidating chunks.
+
+  Each freshly allocated chunk must have both cinuse and pinuse set.
+  That is, each allocated chunk borders either a previously allocated
+  and still in-use chunk, or the base of its memory arena. This is
+  ensured by making all allocations from the `lowest' part of any
+  found chunk.  Further, no free chunk physically borders another one,
+  so each free chunk is known to be preceded and followed by either
+  inuse chunks or the ends of memory.
+
+  Note that the `foot' of the current chunk is actually represented
+  as the prev_foot of the NEXT chunk. This makes it easier to
+  deal with alignments etc but can be very confusing when trying
+  to extend or adapt this code.
+
+  The exceptions to all this are
+
+     1. The special chunk `top' is the top-most available chunk (i.e.,
+        the one bordering the end of available memory). It is treated
+        specially.  Top is never included in any bin, is used only if
+        no other chunk is available, and is released back to the
+        system if it is very large (see M_TRIM_THRESHOLD).  In effect,
+        the top chunk is treated as larger (and thus less well
+        fitting) than any other available chunk.  The top chunk
+        doesn't update its trailing size field since there is no next
+        contiguous chunk that would have to index off it. However,
+        space is still allocated for it (TOP_FOOT_SIZE) to enable
+        separation or merging when space is extended.
+
+     3. Chunks allocated via mmap, have both cinuse and pinuse bits
+        cleared in their head fields.  Because they are allocated
+        one-by-one, each must carry its own prev_foot field, which is
+        also used to hold the offset this chunk has within its mmapped
+        region, which is needed to preserve alignment. Each mmapped
+        chunk is trailed by the first two fields of a fake next-chunk
+        for sake of usage checks.
+
+*/
+
+struct malloc_chunk {
+
+  size_t               prev_foot;     /* Size of previous chunk (if free).  */
+  size_t               head;                        /* Size and inuse bits. */
+  struct malloc_chunk *fd;            /* double links -- used only if free. */
+  struct malloc_chunk *bk;
+
+};
+
+typedef struct malloc_chunk  mchunk;
+typedef struct malloc_chunk *mchunkptr;
+typedef struct malloc_chunk *sbinptr;         /* The type of bins of chunks */
+typedef unsigned int         bindex_t;                   /* Described below */
+typedef unsigned int         binmap_t;                   /* Described below */
+typedef unsigned int         flag_t;   /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+  #define MCHUNK_SIZE (sizeof(mchunk))
+
+  #if FOOTERS
+    #define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+  #else                                                          /* FOOTERS */
+    #define CHUNK_OVERHEAD (SIZE_T_SIZE)
+  #endif                                                         /* FOOTERS */
+
+  /* MMapped chunks need a second word of overhead ... */
+  #define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+  /* ... and additional padding for fake next-chunk at foot */
+  #define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
+
+  /* The smallest size we can malloc is an aligned minimal chunk */
+  #define MIN_CHUNK_SIZE ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+  /* conversion from malloc headers to user pointers, and back */
+  #define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES))
+  #define mem2chunk(mem) ((mchunkptr)((char *)(mem)-TWO_SIZE_T_SIZES))
+  /* chunk associated with aligned address A */
+  #define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+  /* Bounds on request (not chunk) sizes. */
+  #define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
+  #define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+  /* pad request bytes into a usable size */
+  #define pad_request(req) \
+    (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+  /* pad request, checking for minimum (but not maximum) */
+  #define request2size(req) \
+    (((req) < MIN_REQUEST) ? MIN_CHUNK_SIZE : pad_request(req))
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use, unless mmapped, in which case both bits are cleared.
+
+  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+  #define PINUSE_BIT (SIZE_T_ONE)
+  #define CINUSE_BIT (SIZE_T_TWO)
+  #define FLAG4_BIT (SIZE_T_FOUR)
+  #define INUSE_BITS (PINUSE_BIT | CINUSE_BIT)
+  #define FLAG_BITS (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT)
+
+  /* Head value for fenceposts */
+  #define FENCEPOST_HEAD (INUSE_BITS | SIZE_T_SIZE)
+
+  /* extraction of fields from head words */
+  #define cinuse(p) ((p)->head & CINUSE_BIT)
+  #define pinuse(p) ((p)->head & PINUSE_BIT)
+  #define flag4inuse(p) ((p)->head & FLAG4_BIT)
+  #define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
+  #define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
+
+  #define chunksize(p) ((p)->head & ~(FLAG_BITS))
+
+  #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
+  #define set_flag4(p) ((p)->head |= FLAG4_BIT)
+  #define clear_flag4(p) ((p)->head &= ~FLAG4_BIT)
+
+  /* Treat space at ptr +/- offset as a chunk */
+  #define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
+  #define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s)))
+
+  /* Ptr to next or previous physical malloc_chunk. */
+  #define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~FLAG_BITS)))
+  #define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot)))
+
+  /* extract next chunk's pinuse bit */
+  #define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
+
+  /* Get/set size at footer */
+  #define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot)
+  #define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
+
+  /* Set size, pinuse bit, and foot */
+  #define set_size_and_pinuse_of_free_chunk(p, s) \
+    ((p)->head = (s | PINUSE_BIT), set_foot(p, s))
+
+  /* Set size, pinuse bit, foot, and clear next pinuse */
+  #define set_free_with_pinuse(p, s, n) \
+    (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+  /* Get the internal overhead associated with chunk p */
+  #define overhead_for(p) (is_mmapped(p) ? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+  /* Return true if malloced space is not necessarily cleared */
+  #if MMAP_CLEARS
+    #define calloc_must_clear(p) (!is_mmapped(p))
+  #else                                                      /* MMAP_CLEARS */
+    #define calloc_must_clear(p) (1)
+  #endif                                                     /* MMAP_CLEARS */
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+struct malloc_tree_chunk {
+
+  /* The first four fields must be compatible with malloc_chunk */
+  size_t                    prev_foot;
+  size_t                    head;
+  struct malloc_tree_chunk *fd;
+  struct malloc_tree_chunk *bk;
+
+  struct malloc_tree_chunk *child[2];
+  struct malloc_tree_chunk *parent;
+  bindex_t                  index;
+
+};
+
+typedef struct malloc_tree_chunk  tchunk;
+typedef struct malloc_tree_chunk *tchunkptr;
+typedef struct malloc_tree_chunk *tbinptr;     /* The type of bins of trees */
+
+  /* A little helper macro for trees */
+  #define leftmost_child(t) ((t)->child[0] != 0 ? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If USE_MMAP_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+struct malloc_segment {
+
+  char *                 base;                              /* base address */
+  size_t                 size;                            /* allocated size */
+  struct malloc_segment *next;                       /* ptr to next segment */
+  flag_t                 sflags;                    /* mmap and extern flag */
+
+};
+
+  #define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
+  #define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
+
+typedef struct malloc_segment  msegment;
+typedef struct malloc_segment *msegmentptr;
+
+  /* ---------------------------- malloc_state ----------------------------- */
+
+  /*
+     A malloc_state holds all of the bookkeeping for a space.
+     The main fields are:
+
+    Top
+      The topmost chunk of the currently active segment. Its size is
+      cached in topsize.  The actual size of topmost space is
+      topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+      fenceposts and segment records if necessary when getting more
+      space from the system.  The size at which to autotrim top is
+      cached from mparams in trim_check, except that it is disabled if
+      an autotrim fails.
+
+    Designated victim (dv)
+      This is the preferred chunk for servicing small requests that
+      don't have exact fits.  It is normally the chunk split off most
+      recently to service another small request.  Its size is cached in
+      dvsize. The link fields of this chunk are not maintained since it
+      is not kept in a bin.
+
+    SmallBins
+      An array of bin headers for free chunks.  These bins hold chunks
+      with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+      chunks of all the same size, spaced 8 bytes apart.  To simplify
+      use in double-linked lists, each bin header acts as a malloc_chunk
+      pointing to the real first node, if it exists (else pointing to
+      itself).  This avoids special-casing for headers.  But to avoid
+      waste, we allocate only the fd/bk pointers of bins, and then use
+      repositioning tricks to treat these as the fields of a chunk.
+
+    TreeBins
+      Treebins are pointers to the roots of trees holding a range of
+      sizes. There are 2 equally spaced treebins for each power of two
+      from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+      larger.
+
+    Bin maps
+      There is one bit map for small bins ("smallmap") and one for
+      treebins ("treemap).  Each bin sets its bit when non-empty, and
+      clears the bit when empty.  Bit operations are then used to avoid
+      bin-by-bin searching -- nearly all "search" is done without ever
+      looking at bins that won't be selected.  The bit maps
+      conservatively use 32 bits per map word, even if on 64bit system.
+      For a good description of some of the bit-based techniques used
+      here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+      supplement at http://hackersdelight.org/). Many of these are
+      intended to reduce the branchiness of paths through malloc etc, as
+      well as to reduce the number of memory locations read or written.
+
+    Segments
+      A list of segments headed by an embedded malloc_segment record
+      representing the initial space.
+
+    Address check support
+      The least_addr field is the least address ever obtained from
+      MORECORE or MMAP. Attempted frees and reallocs of any address less
+      than this are trapped (unless INSECURE is defined).
+
+    Magic tag
+      A cross-check field that should always hold same value as mparams.magic.
+
+    Max allowed footprint
+      The maximum allowed bytes to allocate from system (zero means no limit)
+
+    Flags
+      Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+    Statistics
+      Each space keeps track of current and maximum system memory
+      obtained via MORECORE or MMAP.
+
+    Trim support
+      Fields holding the amount of unused topmost memory that should trigger
+      trimming, and a counter to force periodic scanning to release unused
+      non-topmost segments.
+
+    Locking
+      If USE_LOCKS is defined, the "mutex" lock is acquired and released
+      around every public call using this mspace.
+
+    Extension support
+      A void* pointer and a size_t field that can be used to help implement
+      extensions to this malloc.
+  */
+
+  /* Bin types, widths and sizes */
+  #define NSMALLBINS (32U)
+  #define NTREEBINS (32U)
+  #define SMALLBIN_SHIFT (3U)
+  #define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
+  #define TREEBIN_SHIFT (8U)
+  #define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
+  #define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
+  #define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+
+  binmap_t  smallmap;
+  binmap_t  treemap;
+  size_t    dvsize;
+  size_t    topsize;
+  char *    least_addr;
+  mchunkptr dv;
+  mchunkptr top;
+  size_t    trim_check;
+  size_t    release_checks;
+  size_t    magic;
+  mchunkptr smallbins[(NSMALLBINS + 1) * 2];
+  tbinptr   treebins[NTREEBINS];
+  size_t    footprint;
+  size_t    max_footprint;
+  size_t    footprint_limit;                         /* zero means no limit */
+  flag_t    mflags;
+  #if USE_LOCKS
+  MLOCK_T mutex;             /* locate lock among fields that rarely change */
+  #endif                                                       /* USE_LOCKS */
+  msegment seg;
+  void *   extp;                     /* Unused but available for extensions */
+  size_t   exts;
+
+};
+
+typedef struct malloc_state *mstate;
+
+/* ------------- Global malloc_state and malloc_params ------------------- */
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams. Note that the non-zeroness of "magic"
+  also serves as an initialization flag.
+*/
+
+struct malloc_params {
+
+  size_t magic;
+  size_t page_size;
+  size_t granularity;
+  size_t mmap_threshold;
+  size_t trim_threshold;
+  flag_t default_mflags;
+
+};
+
+static struct malloc_params mparams;
+
+  /* Ensure mparams initialized */
+  #define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+
+  #if !ONLY_MSPACES
+
+/* The global malloc_state used for all non-"mspace" calls */
+static struct malloc_state _gm_;
+    #define gm (&_gm_)
+    #define is_global(M) ((M) == &_gm_)
+
+  #endif                                                   /* !ONLY_MSPACES */
+
+  #define is_initialized(M) ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* Operations on mflags */
+
+  #define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
+  #define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
+  #if USE_LOCKS
+    #define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
+  #else
+    #define disable_lock(M)
+  #endif
+
+  #define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
+  #define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
+  #if HAVE_MMAP
+    #define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
+  #else
+    #define disable_mmap(M)
+  #endif
+
+  #define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
+  #define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
+
+  #define set_lock(M, L) \
+    ((M)->mflags =       \
+         (L) ? ((M)->mflags | USE_LOCK_BIT) : ((M)->mflags & ~USE_LOCK_BIT))
+
+  /* page-align a size */
+  #define page_align(S)                         \
+    (((S) + (mparams.page_size - SIZE_T_ONE)) & \
+     ~(mparams.page_size - SIZE_T_ONE))
+
+  /* granularity-align a size */
+  #define granularity_align(S)                    \
+    (((S) + (mparams.granularity - SIZE_T_ONE)) & \
+     ~(mparams.granularity - SIZE_T_ONE))
+
+  /* For mmap, use granularity alignment on windows, else page-align */
+  #ifdef WIN32
+    #define mmap_align(S) granularity_align(S)
+  #else
+    #define mmap_align(S) page_align(S)
+  #endif
+
+  /* For sys_alloc, enough padding to ensure can malloc request on success */
+  #define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+  #define is_page_aligned(S) \
+    (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+  #define is_granularity_aligned(S) \
+    (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+  /*  True if segment S holds address A */
+  #define segment_holds(S, A) \
+    ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char *addr) {
+
+  msegmentptr sp = &m->seg;
+  for (;;) {
+
+    if (addr >= sp->base && addr < sp->base + sp->size) return sp;
+    if ((sp = sp->next) == 0) return 0;
+
+  }
+
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss) {
+
+  msegmentptr sp = &m->seg;
+  for (;;) {
+
+    if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size) return 1;
+    if ((sp = sp->next) == 0) return 0;
+
+  }
+
+}
+
+  #ifndef MORECORE_CANNOT_TRIM
+    #define should_trim(M, s) ((s) > (M)->trim_check)
+  #else                                             /* MORECORE_CANNOT_TRIM */
+    #define should_trim(M, s) (0)
+  #endif                                            /* MORECORE_CANNOT_TRIM */
+
+  /*
+    TOP_FOOT_SIZE is padding at the end of a segment, including space
+    that may be needed to place segment records and fenceposts when new
+    noncontiguous segments are added.
+  */
+  #define TOP_FOOT_SIZE                                                        \
+    (align_offset(chunk2mem(0)) + pad_request(sizeof(struct malloc_segment)) + \
+     MIN_CHUNK_SIZE)
+
+/* -------------------------------  Hooks -------------------------------- */
+
+/*
+  PREACTION should be defined to return 0 on success, and nonzero on
+  failure. If you are not using locking, you can redefine these to do
+  anything you like.
+*/
+
+  #if USE_LOCKS
+    #define PREACTION(M) ((use_lock(M)) ? ACQUIRE_LOCK(&(M)->mutex) : 0)
+    #define POSTACTION(M)                           \
+      {                                             \
+                                                    \
+        if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); \
+                                                    \
+      }
+  #else                                                        /* USE_LOCKS */
+
+    #ifndef PREACTION
+      #define PREACTION(M) (0)
+    #endif                                                     /* PREACTION */
+
+    #ifndef POSTACTION
+      #define POSTACTION(M)
+    #endif                                                    /* POSTACTION */
+
+  #endif                                                       /* USE_LOCKS */
+
+/*
+  CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
+  USAGE_ERROR_ACTION is triggered on detected bad frees and
+  reallocs. The argument p is an address that might have triggered the
+  fault. It is ignored by the two predefined actions, but might be
+  useful in custom actions that try to help diagnose errors.
+*/
+
+  #if PROCEED_ON_ERROR
+
+/* A count of the number of corruption errors causing resets */
+int malloc_corruption_error_count;
+
+/* default corruption action */
+static void reset_on_error(mstate m);
+
+    #define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
+    #define USAGE_ERROR_ACTION(m, p)
+
+  #else                                                 /* PROCEED_ON_ERROR */
+
+    #ifndef CORRUPTION_ERROR_ACTION
+      #define CORRUPTION_ERROR_ACTION(m) ABORT
+    #endif                                       /* CORRUPTION_ERROR_ACTION */
+
+    #ifndef USAGE_ERROR_ACTION
+      #define USAGE_ERROR_ACTION(m, p) ABORT
+    #endif                                            /* USAGE_ERROR_ACTION */
+
+  #endif                                                /* PROCEED_ON_ERROR */
+
+/* -------------------------- Debugging setup ---------------------------- */
+
+  #if !DEBUG
+
+    #define check_free_chunk(M, P)
+    #define check_inuse_chunk(M, P)
+    #define check_malloced_chunk(M, P, N)
+    #define check_mmapped_chunk(M, P)
+    #define check_malloc_state(M)
+    #define check_top_chunk(M, P)
+
+  #else                                                            /* DEBUG */
+    #define check_free_chunk(M, P) do_check_free_chunk(M, P)
+    #define check_inuse_chunk(M, P) do_check_inuse_chunk(M, P)
+    #define check_top_chunk(M, P) do_check_top_chunk(M, P)
+    #define check_malloced_chunk(M, P, N) do_check_malloced_chunk(M, P, N)
+    #define check_mmapped_chunk(M, P) do_check_mmapped_chunk(M, P)
+    #define check_malloc_state(M) do_check_malloc_state(M)
+
+static void   do_check_any_chunk(mstate m, mchunkptr p);
+static void   do_check_top_chunk(mstate m, mchunkptr p);
+static void   do_check_mmapped_chunk(mstate m, mchunkptr p);
+static void   do_check_inuse_chunk(mstate m, mchunkptr p);
+static void   do_check_free_chunk(mstate m, mchunkptr p);
+static void   do_check_malloced_chunk(mstate m, void *mem, size_t s);
+static void   do_check_tree(mstate m, tchunkptr t);
+static void   do_check_treebin(mstate m, bindex_t i);
+static void   do_check_smallbin(mstate m, bindex_t i);
+static void   do_check_malloc_state(mstate m);
+static int    bin_find(mstate m, mchunkptr x);
+static size_t traverse_and_check(mstate m);
+  #endif                                                           /* DEBUG */
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+  #define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+  #define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
+  #define small_index2size(i) ((i) << SMALLBIN_SHIFT)
+  #define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
+
+  /* addressing by index. See above about smallbin repositioning */
+  #define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i) << 1])))
+  #define treebin_at(M, i) (&((M)->treebins[i]))
+
+  /* assign tree index for size S to variable I. Use x86 asm if possible  */
+  #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    #define compute_tree_index(S, I)                                         \
+      {                                                                      \
+                                                                             \
+        unsigned int X = S >> TREEBIN_SHIFT;                                 \
+        if (X == 0)                                                          \
+          I = 0;                                                             \
+        else if (X > 0xFFFF)                                                 \
+          I = NTREEBINS - 1;                                                 \
+        else {                                                               \
+                                                                             \
+          unsigned int K = (unsigned)sizeof(X) * __CHAR_BIT__ - 1 -          \
+                           (unsigned)__builtin_clz(X);                       \
+          I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
+                                                                             \
+        }                                                                    \
+                                                                             \
+      }
+
+  #elif defined(__INTEL_COMPILER)
+    #define compute_tree_index(S, I)                                         \
+      {                                                                      \
+                                                                             \
+        size_t X = S >> TREEBIN_SHIFT;                                       \
+        if (X == 0)                                                          \
+          I = 0;                                                             \
+        else if (X > 0xFFFF)                                                 \
+          I = NTREEBINS - 1;                                                 \
+        else {                                                               \
+                                                                             \
+          unsigned int K = _bit_scan_reverse(X);                             \
+          I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
+                                                                             \
+        }                                                                    \
+                                                                             \
+      }
+
+  #elif defined(_MSC_VER) && _MSC_VER >= 1300
+    #define compute_tree_index(S, I)                                         \
+      {                                                                      \
+                                                                             \
+        size_t X = S >> TREEBIN_SHIFT;                                       \
+        if (X == 0)                                                          \
+          I = 0;                                                             \
+        else if (X > 0xFFFF)                                                 \
+          I = NTREEBINS - 1;                                                 \
+        else {                                                               \
+                                                                             \
+          unsigned int K;                                                    \
+          _BitScanReverse((DWORD *)&K, (DWORD)X);                            \
+          I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
+                                                                             \
+        }                                                                    \
+                                                                             \
+      }
+
+  #else                                                             /* GNUC */
+    #define compute_tree_index(S, I)                             \
+      {                                                          \
+                                                                 \
+        size_t X = S >> TREEBIN_SHIFT;                           \
+        if (X == 0)                                              \
+          I = 0;                                                 \
+        else if (X > 0xFFFF)                                     \
+          I = NTREEBINS - 1;                                     \
+        else {                                                   \
+                                                                 \
+          unsigned int Y = (unsigned int)X;                      \
+          unsigned int N = ((Y - 0x100) >> 16) & 8;              \
+          unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;     \
+          N += K;                                                \
+          N += K = (((Y <<= K) - 0x4000) >> 16) & 2;             \
+          K = 14 - N + ((Y <<= K) >> 15);                        \
+          I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)); \
+                                                                 \
+        }                                                        \
+                                                                 \
+      }
+  #endif                                                            /* GNUC */
+
+  /* Bit representing maximum resolved size in a treebin at i */
+  #define bit_for_tree_index(i)                 \
+    (i == NTREEBINS - 1) ? (SIZE_T_BITSIZE - 1) \
+                         : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+  /* Shift placing maximum resolved bit in a treebin at i as sign bit */
+  #define leftshift_for_tree_index(i) \
+    ((i == NTREEBINS - 1)             \
+         ? 0                          \
+         : ((SIZE_T_BITSIZE - SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+  /* The size of the smallest chunk held in bin with index i */
+  #define minsize_for_tree_index(i)                 \
+    ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
+     (((size_t)((i)&SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+  /* ------------------------ Operations on bin maps ----------------------- */
+
+  /* bit corresponding to given index */
+  #define idx2bit(i) ((binmap_t)(1) << (i))
+
+  /* Mark/Clear bits with given index */
+  #define mark_smallmap(M, i) ((M)->smallmap |= idx2bit(i))
+  #define clear_smallmap(M, i) ((M)->smallmap &= ~idx2bit(i))
+  #define smallmap_is_marked(M, i) ((M)->smallmap & idx2bit(i))
+
+  #define mark_treemap(M, i) ((M)->treemap |= idx2bit(i))
+  #define clear_treemap(M, i) ((M)->treemap &= ~idx2bit(i))
+  #define treemap_is_marked(M, i) ((M)->treemap & idx2bit(i))
+
+  /* isolate the least set bit of a bitmap */
+  #define least_bit(x) ((x) & -(x))
+
+  /* mask with all bits to left of least bit of x on */
+  #define left_bits(x) ((x << 1) | -(x << 1))
+
+  /* mask with all bits to left of or equal to least bit of x on */
+  #define same_or_left_bits(x) ((x) | -(x))
+
+/* index corresponding to given bit. Use x86 asm if possible */
+
+  #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    #define compute_bit2idx(X, I) \
+      {                           \
+                                  \
+        unsigned int J;           \
+        J = __builtin_ctz(X);     \
+        I = (bindex_t)J;          \
+                                  \
+      }
+
+  #elif defined(__INTEL_COMPILER)
+    #define compute_bit2idx(X, I) \
+      {                           \
+                                  \
+        unsigned int J;           \
+        J = _bit_scan_forward(X); \
+        I = (bindex_t)J;          \
+                                  \
+      }
+
+  #elif defined(_MSC_VER) && _MSC_VER >= 1300
+    #define compute_bit2idx(X, I)        \
+      {                                  \
+                                         \
+        unsigned int J;                  \
+        _BitScanForward((DWORD *)&J, X); \
+        I = (bindex_t)J;                 \
+                                         \
+      }
+
+  #elif USE_BUILTIN_FFS
+    #define compute_bit2idx(X, I) I = ffs(X) - 1
+
+  #else
+    #define compute_bit2idx(X, I)            \
+      {                                      \
+                                             \
+        unsigned int Y = X - 1;              \
+        unsigned int K = Y >> (16 - 4) & 16; \
+        unsigned int N = K;                  \
+        Y >>= K;                             \
+        N += K = Y >> (8 - 3) & 8;           \
+        Y >>= K;                             \
+        N += K = Y >> (4 - 2) & 4;           \
+        Y >>= K;                             \
+        N += K = Y >> (2 - 1) & 2;           \
+        Y >>= K;                             \
+        N += K = Y >> (1 - 0) & 1;           \
+        Y >>= K;                             \
+        I = (bindex_t)(N + Y);               \
+                                             \
+      }
+  #endif                                                            /* GNUC */
+
+/* ----------------------- Runtime Check Support ------------------------- */
+
+/*
+  For security, the main invariant is that malloc/free/etc never
+  writes to a static address other than malloc_state, unless static
+  malloc_state itself has been corrupted, which cannot occur via
+  malloc (because of these checks). In essence this means that we
+  believe all pointers, sizes, maps etc held in malloc_state, but
+  check all of those linked or offsetted from other embedded data
+  structures.  These checks are interspersed with main code in a way
+  that tends to minimize their run-time cost.
+
+  When FOOTERS is defined, in addition to range checking, we also
+  verify footer fields of inuse chunks, which can be used guarantee
+  that the mstate controlling malloc/free is intact.  This is a
+  streamlined version of the approach described by William Robertson
+  et al in "Run-time Detection of Heap-based Overflows" LISA'03
+  http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+  of an inuse chunk holds the xor of its mstate and a random seed,
+  that is checked upon calls to free() and realloc().  This is
+  (probabalistically) unguessable from outside the program, but can be
+  computed by any code successfully malloc'ing any chunk, so does not
+  itself provide protection against code that has already broken
+  security through some other means.  Unlike Robertson et al, we
+  always dynamically check addresses of all offset chunks (previous,
+  next, etc). This turns out to be cheaper than relying on hashes.
+*/
+
+  #if !INSECURE
+    /* Check if address a is at least as high as any from MORECORE or MMAP */
+    #define ok_address(M, a) ((char *)(a) >= (M)->least_addr)
+    /* Check if address of next chunk n is higher than base chunk p */
+    #define ok_next(p, n) ((char *)(p) < (char *)(n))
+    /* Check if p has inuse status */
+    #define ok_inuse(p) is_inuse(p)
+    /* Check if p has its pinuse bit on */
+    #define ok_pinuse(p) pinuse(p)
+
+  #else                                                        /* !INSECURE */
+    #define ok_address(M, a) (1)
+    #define ok_next(b, n) (1)
+    #define ok_inuse(p) (1)
+    #define ok_pinuse(p) (1)
+  #endif                                                       /* !INSECURE */
+
+  #if (FOOTERS && !INSECURE)
+    /* Check if (alleged) mstate m has expected magic field */
+    #define ok_magic(M) ((M)->magic == mparams.magic)
+  #else                                           /* (FOOTERS && !INSECURE) */
+    #define ok_magic(M) (1)
+  #endif                                          /* (FOOTERS && !INSECURE) */
+
+  /* In gcc, use __builtin_expect to minimize impact of checks */
+  #if !INSECURE
+    #if defined(__GNUC__) && __GNUC__ >= 3
+      #define RTCHECK(e) __builtin_expect(e, 1)
+    #else                                                           /* GNUC */
+      #define RTCHECK(e) (e)
+    #endif                                                          /* GNUC */
+  #else                                                        /* !INSECURE */
+    #define RTCHECK(e) (1)
+  #endif                                                       /* !INSECURE */
+
+/* macros to set up inuse chunks with or without footers */
+
+  #if !FOOTERS
+
+    #define mark_inuse_foot(M, p, s)
+
+    /* Macros for setting head/foot of non-mmapped chunks */
+
+    /* Set cinuse bit and pinuse bit of next chunk */
+    #define set_inuse(M, p, s)                                  \
+      ((p)->head = (((p)->head & PINUSE_BIT) | s | CINUSE_BIT), \
+       ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+
+    /* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+    #define set_inuse_and_pinuse(M, p, s)         \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT), \
+       ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+
+    /* Set size, cinuse and pinuse bit of this chunk */
+    #define set_size_and_pinuse_of_inuse_chunk(M, p, s) \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT))
+
+  #else                                                          /* FOOTERS */
+
+    /* Set foot of inuse chunk to be xor of mstate and seed */
+    #define mark_inuse_foot(M, p, s)                 \
+      (((mchunkptr)((char *)(p) + (s)))->prev_foot = \
+           ((size_t)(M) ^ mparams.magic))
+
+    #define get_mstate_for(p)                                            \
+      ((mstate)(((mchunkptr)((char *)(p) + (chunksize(p))))->prev_foot ^ \
+                mparams.magic))
+
+    #define set_inuse(M, p, s)                                   \
+      ((p)->head = (((p)->head & PINUSE_BIT) | s | CINUSE_BIT),  \
+       (((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT), \
+       mark_inuse_foot(M, p, s))
+
+    #define set_inuse_and_pinuse(M, p, s)                        \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT),                \
+       (((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT), \
+       mark_inuse_foot(M, p, s))
+
+    #define set_size_and_pinuse_of_inuse_chunk(M, p, s) \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT), mark_inuse_foot(M, p, s))
+
+  #endif                                                        /* !FOOTERS */
+
+/* ---------------------------- setting mparams -------------------------- */
+
+  #if LOCK_AT_FORK
+static void pre_fork(void) {
+
+  ACQUIRE_LOCK(&(gm)->mutex);
+
+}
+
+static void post_fork_parent(void) {
+
+  RELEASE_LOCK(&(gm)->mutex);
+
+}
+
+static void post_fork_child(void) {
+
+  INITIAL_LOCK(&(gm)->mutex);
+
+}
+
+  #endif                                                    /* LOCK_AT_FORK */
+
+/* Initialize mparams */
+static int init_mparams(void) {
+
+  #ifdef NEED_GLOBAL_LOCK_INIT
+  if (malloc_global_mutex_status <= 0) init_malloc_global_mutex();
+  #endif
+
+  ACQUIRE_MALLOC_GLOBAL_LOCK();
+  if (mparams.magic == 0) {
+
+    size_t magic;
+    size_t psize;
+    size_t gsize;
+
+  #ifndef WIN32
+    psize = malloc_getpagesize;
+    gsize = ((DEFAULT_GRANULARITY != 0) ? DEFAULT_GRANULARITY : psize);
+  #else                                                            /* WIN32 */
+    {
+
+      SYSTEM_INFO system_info;
+      GetSystemInfo(&system_info);
+      psize = system_info.dwPageSize;
+      gsize =
+          ((DEFAULT_GRANULARITY != 0) ? DEFAULT_GRANULARITY
+                                      : system_info.dwAllocationGranularity);
+
+    }
+
+  #endif                                                           /* WIN32 */
+
+    /* Sanity-check configuration:
+       size_t must be unsigned and as wide as pointer type.
+       ints must be at least 4 bytes.
+       alignment must be at least 8.
+       Alignment, min chunk size, and page size must all be powers of 2.
+    */
+    if ((sizeof(size_t) != sizeof(char *)) || (MAX_SIZE_T < MIN_CHUNK_SIZE) ||
+        (sizeof(int) < 4) || (MALLOC_ALIGNMENT < (size_t)8U) ||
+        ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT - SIZE_T_ONE)) != 0) ||
+        ((MCHUNK_SIZE & (MCHUNK_SIZE - SIZE_T_ONE)) != 0) ||
+        ((gsize & (gsize - SIZE_T_ONE)) != 0) ||
+        ((psize & (psize - SIZE_T_ONE)) != 0))
+      ABORT;
+    mparams.granularity = gsize;
+    mparams.page_size = psize;
+    mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+    mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
+  #if MORECORE_CONTIGUOUS
+    mparams.default_mflags = USE_LOCK_BIT | USE_MMAP_BIT;
+  #else                                              /* MORECORE_CONTIGUOUS */
+    mparams.default_mflags =
+        USE_LOCK_BIT | USE_MMAP_BIT | USE_NONCONTIGUOUS_BIT;
+  #endif                                             /* MORECORE_CONTIGUOUS */
+
+  #if !ONLY_MSPACES
+    /* Set up lock for main malloc area */
+    gm->mflags = mparams.default_mflags;
+    (void)INITIAL_LOCK(&gm->mutex);
+  #endif
+  #if LOCK_AT_FORK
+    pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
+  #endif
+
+    {
+
+  #if USE_DEV_RANDOM
+      int           fd;
+      unsigned char buf[sizeof(size_t)];
+      /* Try to use /dev/urandom, else fall back on using time */
+      if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+          read(fd, buf, sizeof(buf)) == sizeof(buf)) {
+
+        magic = *((size_t *)buf);
+        close(fd);
+
+      } else
+
+  #endif                                                  /* USE_DEV_RANDOM */
+  #ifdef WIN32
+        magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+  #elif defined(LACKS_TIME_H)
+      magic = (size_t)&magic ^ (size_t)0x55555555U;
+  #else
+      magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+  #endif
+      magic |= (size_t)8U;                                /* ensure nonzero */
+      magic &= ~(size_t)7U;      /* improve chances of fault for bad values */
+      /* Until memory modes commonly available, use volatile-write */
+      (*(volatile size_t *)(&(mparams.magic))) = magic;
+
+    }
+
+  }
+
+  RELEASE_MALLOC_GLOBAL_LOCK();
+  return 1;
+
+}
+
+/* support for mallopt */
+static int change_mparam(int param_number, int value) {
+
+  size_t val;
+  ensure_initialization();
+  val = (value == -1) ? MAX_SIZE_T : (size_t)value;
+  switch (param_number) {
+
+    case M_TRIM_THRESHOLD:
+      mparams.trim_threshold = val;
+      return 1;
+    case M_GRANULARITY:
+      if (val >= mparams.page_size && ((val & (val - 1)) == 0)) {
+
+        mparams.granularity = val;
+        return 1;
+
+      } else
+
+        return 0;
+    case M_MMAP_THRESHOLD:
+      mparams.mmap_threshold = val;
+      return 1;
+    default:
+      return 0;
+
+  }
+
+}
+
+  #if DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+/* Check properties of any chunk, whether free, inuse, mmapped etc  */
+static void do_check_any_chunk(mstate m, mchunkptr p) {
+
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+
+}
+
+/* Check properties of top chunk */
+static void do_check_top_chunk(mstate m, mchunkptr p) {
+
+  msegmentptr sp = segment_holding(m, (char *)p);
+  size_t      sz = p->head & ~INUSE_BITS;   /* third-lowest bit can be set! */
+  assert(sp != 0);
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(sz == m->topsize);
+  assert(sz > 0);
+  assert(sz == ((sp->base + sp->size) - (char *)p) - TOP_FOOT_SIZE);
+  assert(pinuse(p));
+  assert(!pinuse(chunk_plus_offset(p, sz)));
+
+}
+
+/* Check properties of (inuse) mmapped chunks */
+static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
+
+  size_t sz = chunksize(p);
+  size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
+  assert(is_mmapped(p));
+  assert(use_mmap(m));
+  assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
+  assert(ok_address(m, p));
+  assert(!is_small(sz));
+  assert((len & (mparams.page_size - SIZE_T_ONE)) == 0);
+  assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
+  assert(chunk_plus_offset(p, sz + SIZE_T_SIZE)->head == 0);
+
+}
+
+/* Check properties of inuse chunks */
+static void do_check_inuse_chunk(mstate m, mchunkptr p) {
+
+  do_check_any_chunk(m, p);
+  assert(is_inuse(p));
+  assert(next_pinuse(p));
+  /* If not pinuse and not mmapped, previous chunk has OK offset */
+  assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
+  if (is_mmapped(p)) do_check_mmapped_chunk(m, p);
+
+}
+
+/* Check properties of free chunks */
+static void do_check_free_chunk(mstate m, mchunkptr p) {
+
+  size_t    sz = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, sz);
+  do_check_any_chunk(m, p);
+  assert(!is_inuse(p));
+  assert(!next_pinuse(p));
+  assert(!is_mmapped(p));
+  if (p != m->dv && p != m->top) {
+
+    if (sz >= MIN_CHUNK_SIZE) {
+
+      assert((sz & CHUNK_ALIGN_MASK) == 0);
+      assert(is_aligned(chunk2mem(p)));
+      assert(next->prev_foot == sz);
+      assert(pinuse(p));
+      assert(next == m->top || is_inuse(next));
+      assert(p->fd->bk == p);
+      assert(p->bk->fd == p);
+
+    } else                        /* markers are always of size SIZE_T_SIZE */
+
+      assert(sz == SIZE_T_SIZE);
+
+  }
+
+}
+
+/* Check properties of malloced chunks at the point they are malloced */
+static void do_check_malloced_chunk(mstate m, void *mem, size_t s) {
+
+  if (mem != 0) {
+
+    mchunkptr p = mem2chunk(mem);
+    size_t    sz = p->head & ~INUSE_BITS;
+    do_check_inuse_chunk(m, p);
+    assert((sz & CHUNK_ALIGN_MASK) == 0);
+    assert(sz >= MIN_CHUNK_SIZE);
+    assert(sz >= s);
+    /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
+    assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
+
+  }
+
+}
+
+/* Check a tree and its subtrees.  */
+static void do_check_tree(mstate m, tchunkptr t) {
+
+  tchunkptr head = 0;
+  tchunkptr u = t;
+  bindex_t  tindex = t->index;
+  size_t    tsize = chunksize(t);
+  bindex_t  idx;
+  compute_tree_index(tsize, idx);
+  assert(tindex == idx);
+  assert(tsize >= MIN_LARGE_SIZE);
+  assert(tsize >= minsize_for_tree_index(idx));
+  assert((idx == NTREEBINS - 1) || (tsize < minsize_for_tree_index((idx + 1))));
+
+  do {                        /* traverse through chain of same-sized nodes */
+    do_check_any_chunk(m, ((mchunkptr)u));
+    assert(u->index == tindex);
+    assert(chunksize(u) == tsize);
+    assert(!is_inuse(u));
+    assert(!next_pinuse(u));
+    assert(u->fd->bk == u);
+    assert(u->bk->fd == u);
+    if (u->parent == 0) {
+
+      assert(u->child[0] == 0);
+      assert(u->child[1] == 0);
+
+    } else {
+
+      assert(head == 0);               /* only one node on chain has parent */
+      head = u;
+      assert(u->parent != u);
+      assert(u->parent->child[0] == u || u->parent->child[1] == u ||
+             *((tbinptr *)(u->parent)) == u);
+      if (u->child[0] != 0) {
+
+        assert(u->child[0]->parent == u);
+        assert(u->child[0] != u);
+        do_check_tree(m, u->child[0]);
+
+      }
+
+      if (u->child[1] != 0) {
+
+        assert(u->child[1]->parent == u);
+        assert(u->child[1] != u);
+        do_check_tree(m, u->child[1]);
+
+      }
+
+      if (u->child[0] != 0 && u->child[1] != 0) {
+
+        assert(chunksize(u->child[0]) < chunksize(u->child[1]));
+
+      }
+
+    }
+
+    u = u->fd;
+
+  } while (u != t);
+
+  assert(head != 0);
+
+}
+
+/*  Check all the chunks in a treebin.  */
+static void do_check_treebin(mstate m, bindex_t i) {
+
+  tbinptr * tb = treebin_at(m, i);
+  tchunkptr t = *tb;
+  int       empty = (m->treemap & (1U << i)) == 0;
+  if (t == 0) assert(empty);
+  if (!empty) do_check_tree(m, t);
+
+}
+
+/*  Check all the chunks in a smallbin.  */
+static void do_check_smallbin(mstate m, bindex_t i) {
+
+  sbinptr      b = smallbin_at(m, i);
+  mchunkptr    p = b->bk;
+  unsigned int empty = (m->smallmap & (1U << i)) == 0;
+  if (p == b) assert(empty);
+  if (!empty) {
+
+    for (; p != b; p = p->bk) {
+
+      size_t    size = chunksize(p);
+      mchunkptr q;
+      /* each chunk claims to be free */
+      do_check_free_chunk(m, p);
+      /* chunk belongs in bin */
+      assert(small_index(size) == i);
+      assert(p->bk == b || chunksize(p->bk) == chunksize(p));
+      /* chunk is followed by an inuse chunk */
+      q = next_chunk(p);
+      if (q->head != FENCEPOST_HEAD) do_check_inuse_chunk(m, q);
+
+    }
+
+  }
+
+}
+
+/* Find x in a bin. Used in other check functions. */
+static int bin_find(mstate m, mchunkptr x) {
+
+  size_t size = chunksize(x);
+  if (is_small(size)) {
+
+    bindex_t sidx = small_index(size);
+    sbinptr  b = smallbin_at(m, sidx);
+    if (smallmap_is_marked(m, sidx)) {
+
+      mchunkptr p = b;
+      do {
+
+        if (p == x) return 1;
+
+      } while ((p = p->fd) != b);
+
+    }
+
+  } else {
+
+    bindex_t tidx;
+    compute_tree_index(size, tidx);
+    if (treemap_is_marked(m, tidx)) {
+
+      tchunkptr t = *treebin_at(m, tidx);
+      size_t    sizebits = size << leftshift_for_tree_index(tidx);
+      while (t != 0 && chunksize(t) != size) {
+
+        t = t->child[(sizebits >> (SIZE_T_BITSIZE - SIZE_T_ONE)) & 1];
+        sizebits <<= 1;
+
+      }
+
+      if (t != 0) {
+
+        tchunkptr u = t;
+        do {
+
+          if (u == (tchunkptr)x) return 1;
+
+        } while ((u = u->fd) != t);
+
+      }
+
+    }
+
+  }
+
+  return 0;
+
+}
+
+/* Traverse each chunk and check it; return total */
+static size_t traverse_and_check(mstate m) {
+
+  size_t sum = 0;
+  if (is_initialized(m)) {
+
+    msegmentptr s = &m->seg;
+    sum += m->topsize + TOP_FOOT_SIZE;
+    while (s != 0) {
+
+      mchunkptr q = align_as_chunk(s->base);
+      mchunkptr lastq = 0;
+      assert(pinuse(q));
+      while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) {
+
+        sum += chunksize(q);
+        if (is_inuse(q)) {
+
+          assert(!bin_find(m, q));
+          do_check_inuse_chunk(m, q);
+
+        } else {
+
+          assert(q == m->dv || bin_find(m, q));
+          assert(lastq == 0 || is_inuse(lastq));  /* Not 2 consecutive free */
+          do_check_free_chunk(m, q);
+
+        }
+
+        lastq = q;
+        q = next_chunk(q);
+
+      }
+
+      s = s->next;
+
+    }
+
+  }
+
+  return sum;
+
+}
+
+/* Check all properties of malloc_state. */
+static void do_check_malloc_state(mstate m) {
+
+  bindex_t i;
+  size_t   total;
+  /* check bins */
+  for (i = 0; i < NSMALLBINS; ++i)
+    do_check_smallbin(m, i);
+  for (i = 0; i < NTREEBINS; ++i)
+    do_check_treebin(m, i);
+
+  if (m->dvsize != 0) {                                   /* check dv chunk */
+    do_check_any_chunk(m, m->dv);
+    assert(m->dvsize == chunksize(m->dv));
+    assert(m->dvsize >= MIN_CHUNK_SIZE);
+    assert(bin_find(m, m->dv) == 0);
+
+  }
+
+  if (m->top != 0) {                                     /* check top chunk */
+    do_check_top_chunk(m, m->top);
+    /*assert(m->topsize == chunksize(m->top)); redundant */
+    assert(m->topsize > 0);
+    assert(bin_find(m, m->top) == 0);
+
+  }
+
+  total = traverse_and_check(m);
+  assert(total <= m->footprint);
+  assert(m->footprint <= m->max_footprint);
+
+}
+
+  #endif                                                           /* DEBUG */
+
+/* ----------------------------- statistics ------------------------------ */
+
+  #if !NO_MALLINFO
+static struct mallinfo internal_mallinfo(mstate m) {
+
+  struct mallinfo nm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  ensure_initialization();
+  if (!PREACTION(m)) {
+
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+
+      size_t      nfree = SIZE_T_ONE;                    /* top always free */
+      size_t      mfree = m->topsize + TOP_FOOT_SIZE;
+      size_t      sum = mfree;
+      msegmentptr s = &m->seg;
+      while (s != 0) {
+
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) && q != m->top &&
+               q->head != FENCEPOST_HEAD) {
+
+          size_t sz = chunksize(q);
+          sum += sz;
+          if (!is_inuse(q)) {
+
+            mfree += sz;
+            ++nfree;
+
+          }
+
+          q = next_chunk(q);
+
+        }
+
+        s = s->next;
+
+      }
+
+      nm.arena = sum;
+      nm.ordblks = nfree;
+      nm.hblkhd = m->footprint - sum;
+      nm.usmblks = m->max_footprint;
+      nm.uordblks = m->footprint - mfree;
+      nm.fordblks = mfree;
+      nm.keepcost = m->topsize;
+
+    }
+
+    POSTACTION(m);
+
+  }
+
+  return nm;
+
+}
+
+  #endif                                                    /* !NO_MALLINFO */
+
+  #if !NO_MALLOC_STATS
+static void internal_malloc_stats(mstate m) {
+
+  ensure_initialization();
+  if (!PREACTION(m)) {
+
+    size_t maxfp = 0;
+    size_t fp = 0;
+    size_t used = 0;
+    check_malloc_state(m);
+    if (is_initialized(m)) {
+
+      msegmentptr s = &m->seg;
+      maxfp = m->max_footprint;
+      fp = m->footprint;
+      used = fp - (m->topsize + TOP_FOOT_SIZE);
+
+      while (s != 0) {
+
+        mchunkptr q = align_as_chunk(s->base);
+        while (segment_holds(s, q) && q != m->top &&
+               q->head != FENCEPOST_HEAD) {
+
+          if (!is_inuse(q)) used -= chunksize(q);
+          q = next_chunk(q);
+
+        }
+
+        s = s->next;
+
+      }
+
+    }
+
+    POSTACTION(m);                                             /* drop lock */
+    fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
+    fprintf(stderr, "system bytes     = %10lu\n", (unsigned long)(fp));
+    fprintf(stderr, "in use bytes     = %10lu\n", (unsigned long)(used));
+
+  }
+
+}
+
+  #endif                                                 /* NO_MALLOC_STATS */
+
+  /* ----------------------- Operations on smallbins ----------------------- */
+
+  /*
+    Various forms of linking and unlinking are defined as macros.  Even
+    the ones for trees, which are very long but have very short typical
+    paths.  This is ugly but reduces reliance on inlining support of
+    compilers.
+  */
+
+  /* Link a free chunk into a smallbin  */
+  #define insert_small_chunk(M, P, S)         \
+    {                                         \
+                                              \
+      bindex_t  I = small_index(S);           \
+      mchunkptr B = smallbin_at(M, I);        \
+      mchunkptr F = B;                        \
+      assert(S >= MIN_CHUNK_SIZE);            \
+      if (!smallmap_is_marked(M, I))          \
+        mark_smallmap(M, I);                  \
+      else if (RTCHECK(ok_address(M, B->fd))) \
+        F = B->fd;                            \
+      else {                                  \
+                                              \
+        CORRUPTION_ERROR_ACTION(M);           \
+                                              \
+      }                                       \
+      B->fd = P;                              \
+      F->bk = P;                              \
+      P->fd = F;                              \
+      P->bk = B;                              \
+                                              \
+    }
+
+  /* Unlink a chunk from a smallbin  */
+  #define unlink_small_chunk(M, P, S)                           \
+    {                                                           \
+                                                                \
+      mchunkptr F = P->fd;                                      \
+      mchunkptr B = P->bk;                                      \
+      bindex_t  I = small_index(S);                             \
+      assert(P != B);                                           \
+      assert(P != F);                                           \
+      assert(chunksize(P) == small_index2size(I));              \
+      if (RTCHECK(F == smallbin_at(M, I) ||                     \
+                  (ok_address(M, F) && F->bk == P))) {          \
+                                                                \
+        if (B == F) {                                           \
+                                                                \
+          clear_smallmap(M, I);                                 \
+                                                                \
+        } else if (RTCHECK(B == smallbin_at(M, I) ||            \
+                                                                \
+                                                                \
+                           (ok_address(M, B) && B->fd == P))) { \
+                                                                \
+          F->bk = B;                                            \
+          B->fd = F;                                            \
+                                                                \
+        } else {                                                \
+                                                                \
+          CORRUPTION_ERROR_ACTION(M);                           \
+                                                                \
+        }                                                       \
+                                                                \
+      } else {                                                  \
+                                                                \
+        CORRUPTION_ERROR_ACTION(M);                             \
+                                                                \
+      }                                                         \
+                                                                \
+    }
+
+  /* Unlink the first chunk from a smallbin */
+  #define unlink_first_small_chunk(M, B, P, I)              \
+    {                                                       \
+                                                            \
+      mchunkptr F = P->fd;                                  \
+      assert(P != B);                                       \
+      assert(P != F);                                       \
+      assert(chunksize(P) == small_index2size(I));          \
+      if (B == F) {                                         \
+                                                            \
+        clear_smallmap(M, I);                               \
+                                                            \
+      } else if (RTCHECK(ok_address(M, F) && F->bk == P)) { \
+                                                            \
+        F->bk = B;                                          \
+        B->fd = F;                                          \
+                                                            \
+      } else {                                              \
+                                                            \
+        CORRUPTION_ERROR_ACTION(M);                         \
+                                                            \
+      }                                                     \
+                                                            \
+    }
+
+  /* Replace dv node, binning the old one */
+  /* Used only when dvsize known to be small */
+  #define replace_dv(M, P, S)           \
+    {                                   \
+                                        \
+      size_t DVS = M->dvsize;           \
+      assert(is_small(DVS));            \
+      if (DVS != 0) {                   \
+                                        \
+        mchunkptr DV = M->dv;           \
+        insert_small_chunk(M, DV, DVS); \
+                                        \
+      }                                 \
+      M->dvsize = S;                    \
+      M->dv = P;                        \
+                                        \
+    }
+
+  /* ------------------------- Operations on trees ------------------------- */
+
+  /* Insert chunk into tree */
+  #define insert_large_chunk(M, X, S)                                  \
+    {                                                                  \
+                                                                       \
+      tbinptr *H;                                                      \
+      bindex_t I;                                                      \
+      compute_tree_index(S, I);                                        \
+      H = treebin_at(M, I);                                            \
+      X->index = I;                                                    \
+      X->child[0] = X->child[1] = 0;                                   \
+      if (!treemap_is_marked(M, I)) {                                  \
+                                                                       \
+        mark_treemap(M, I);                                            \
+        *H = X;                                                        \
+        X->parent = (tchunkptr)H;                                      \
+        X->fd = X->bk = X;                                             \
+                                                                       \
+      } else {                                                         \
+                                                                       \
+        tchunkptr T = *H;                                              \
+        size_t    K = S << leftshift_for_tree_index(I);                \
+        for (;;) {                                                     \
+                                                                       \
+          if (chunksize(T) != S) {                                     \
+                                                                       \
+            tchunkptr *C =                                             \
+                &(T->child[(K >> (SIZE_T_BITSIZE - SIZE_T_ONE)) & 1]); \
+            K <<= 1;                                                   \
+            if (*C != 0)                                               \
+              T = *C;                                                  \
+            else if (RTCHECK(ok_address(M, C))) {                      \
+                                                                       \
+              *C = X;                                                  \
+              X->parent = T;                                           \
+              X->fd = X->bk = X;                                       \
+              break;                                                   \
+                                                                       \
+            } else {                                                   \
+                                                                       \
+              CORRUPTION_ERROR_ACTION(M);                              \
+              break;                                                   \
+                                                                       \
+            }                                                          \
+                                                                       \
+          } else {                                                     \
+                                                                       \
+            tchunkptr F = T->fd;                                       \
+            if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {       \
+                                                                       \
+              T->fd = F->bk = X;                                       \
+              X->fd = F;                                               \
+              X->bk = T;                                               \
+              X->parent = 0;                                           \
+              break;                                                   \
+                                                                       \
+            } else {                                                   \
+                                                                       \
+              CORRUPTION_ERROR_ACTION(M);                              \
+              break;                                                   \
+                                                                       \
+            }                                                          \
+                                                                       \
+          }                                                            \
+                                                                       \
+        }                                                              \
+                                                                       \
+      }                                                                \
+                                                                       \
+    }
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+  #define unlink_large_chunk(M, X)                                   \
+    {                                                                \
+                                                                     \
+      tchunkptr XP = X->parent;                                      \
+      tchunkptr R;                                                   \
+      if (X->bk != X) {                                              \
+                                                                     \
+        tchunkptr F = X->fd;                                         \
+        R = X->bk;                                                   \
+        if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) { \
+                                                                     \
+          F->bk = R;                                                 \
+          R->fd = F;                                                 \
+                                                                     \
+        } else {                                                     \
+                                                                     \
+          CORRUPTION_ERROR_ACTION(M);                                \
+                                                                     \
+        }                                                            \
+                                                                     \
+      } else {                                                       \
+                                                                     \
+        tchunkptr *RP;                                               \
+        if (((R = *(RP = &(X->child[1]))) != 0) ||                   \
+            ((R = *(RP = &(X->child[0]))) != 0)) {                   \
+                                                                     \
+          tchunkptr *CP;                                             \
+          while ((*(CP = &(R->child[1])) != 0) ||                    \
+                 (*(CP = &(R->child[0])) != 0)) {                    \
+                                                                     \
+            R = *(RP = CP);                                          \
+                                                                     \
+          }                                                          \
+          if (RTCHECK(ok_address(M, RP)))                            \
+            *RP = 0;                                                 \
+          else {                                                     \
+                                                                     \
+            CORRUPTION_ERROR_ACTION(M);                              \
+                                                                     \
+          }                                                          \
+                                                                     \
+        }                                                            \
+                                                                     \
+      }                                                              \
+      if (XP != 0) {                                                 \
+                                                                     \
+        tbinptr *H = treebin_at(M, X->index);                        \
+        if (X == *H) {                                               \
+                                                                     \
+          if ((*H = R) == 0) clear_treemap(M, X->index);             \
+                                                                     \
+        } else if (RTCHECK(ok_address(M, XP))) {                     \
+                                                                     \
+          if (XP->child[0] == X)                                     \
+            XP->child[0] = R;                                        \
+          else                                                       \
+            XP->child[1] = R;                                        \
+                                                                     \
+        } else                                                       \
+                                                                     \
+                                                                     \
+          CORRUPTION_ERROR_ACTION(M);                                \
+        if (R != 0) {                                                \
+                                                                     \
+          if (RTCHECK(ok_address(M, R))) {                           \
+                                                                     \
+            tchunkptr C0, C1;                                        \
+            R->parent = XP;                                          \
+            if ((C0 = X->child[0]) != 0) {                           \
+                                                                     \
+              if (RTCHECK(ok_address(M, C0))) {                      \
+                                                                     \
+                R->child[0] = C0;                                    \
+                C0->parent = R;                                      \
+                                                                     \
+              } else                                                 \
+                                                                     \
+                                                                     \
+                CORRUPTION_ERROR_ACTION(M);                          \
+                                                                     \
+            }                                                        \
+            if ((C1 = X->child[1]) != 0) {                           \
+                                                                     \
+              if (RTCHECK(ok_address(M, C1))) {                      \
+                                                                     \
+                R->child[1] = C1;                                    \
+                C1->parent = R;                                      \
+                                                                     \
+              } else                                                 \
+                                                                     \
+                                                                     \
+                CORRUPTION_ERROR_ACTION(M);                          \
+                                                                     \
+            }                                                        \
+                                                                     \
+          } else                                                     \
+                                                                     \
+                                                                     \
+            CORRUPTION_ERROR_ACTION(M);                              \
+                                                                     \
+        }                                                            \
+                                                                     \
+      }                                                              \
+                                                                     \
+    }
+
+/* Relays to large vs small bin operations */
+
+  #define insert_chunk(M, P, S)                         \
+    if (is_small(S)) insert_small_chunk(M, P, S) else { \
+                                                        \
+        tchunkptr TP = (tchunkptr)(P);                  \
+        insert_large_chunk(M, TP, S);                   \
+                                                        \
+      }
+
+  #define unlink_chunk(M, P, S)                         \
+    if (is_small(S)) unlink_small_chunk(M, P, S) else { \
+                                                        \
+        tchunkptr TP = (tchunkptr)(P);                  \
+        unlink_large_chunk(M, TP);                      \
+                                                        \
+      }
+
+/* Relays to internal calls to malloc/free from realloc, memalign etc */
+
+  #if ONLY_MSPACES
+    #define internal_malloc(m, b) mspace_malloc(m, b)
+    #define internal_free(m, mem) mspace_free(m, mem);
+  #else                                                     /* ONLY_MSPACES */
+    #if MSPACES
+      #define internal_malloc(m, b) \
+        ((m == gm) ? dlmalloc(b) : mspace_malloc(m, b))
+      #define internal_free(m, mem) \
+        if (m == gm)                \
+          dlfree(mem);              \
+        else                        \
+          mspace_free(m, mem);
+    #else                                                        /* MSPACES */
+      #define internal_malloc(m, b) dlmalloc(b)
+      #define internal_free(m, mem) dlfree(mem)
+    #endif                                                       /* MSPACES */
+  #endif                                                    /* ONLY_MSPACES */
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).
+*/
+
+/* Malloc using mmap */
+static void *mmap_alloc(mstate m, size_t nb) {
+
+  size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  if (m->footprint_limit != 0) {
+
+    size_t fp = m->footprint + mmsize;
+    if (fp <= m->footprint || fp > m->footprint_limit) return 0;
+
+  }
+
+  if (mmsize > nb) {                             /* Check for wrap around 0 */
+    char *mm = (char *)(CALL_DIRECT_MMAP(mmsize));
+    if (mm != CMFAIL) {
+
+      size_t    offset = align_offset(chunk2mem(mm));
+      size_t    psize = mmsize - offset - MMAP_FOOT_PAD;
+      mchunkptr p = (mchunkptr)(mm + offset);
+      p->prev_foot = offset;
+      p->head = psize;
+      mark_inuse_foot(m, p, psize);
+      chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(p, psize + SIZE_T_SIZE)->head = 0;
+
+      if (m->least_addr == 0 || mm < m->least_addr) m->least_addr = mm;
+      if ((m->footprint += mmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      assert(is_aligned(chunk2mem(p)));
+      check_mmapped_chunk(m, p);
+      return chunk2mem(p);
+
+    }
+
+  }
+
+  return 0;
+
+}
+
+/* Realloc using mmap */
+static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) {
+
+  size_t oldsize = chunksize(oldp);
+  (void)flags;                         /* placate people compiling -Wunused */
+  if (is_small(nb))           /* Can't shrink mmap regions below small size */
+    return 0;
+  /* Keep old chunk if big enough but not too big */
+  if (oldsize >= nb + SIZE_T_SIZE &&
+      (oldsize - nb) <= (mparams.granularity << 1))
+    return oldp;
+  else {
+
+    size_t offset = oldp->prev_foot;
+    size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
+    size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+    char * cp =
+        (char *)CALL_MREMAP((char *)oldp - offset, oldmmsize, newmmsize, flags);
+    if (cp != CMFAIL) {
+
+      mchunkptr newp = (mchunkptr)(cp + offset);
+      size_t    psize = newmmsize - offset - MMAP_FOOT_PAD;
+      newp->head = psize;
+      mark_inuse_foot(m, newp, psize);
+      chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+      chunk_plus_offset(newp, psize + SIZE_T_SIZE)->head = 0;
+
+      if (cp < m->least_addr) m->least_addr = cp;
+      if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
+        m->max_footprint = m->footprint;
+      check_mmapped_chunk(m, newp);
+      return newp;
+
+    }
+
+  }
+
+  return 0;
+
+}
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize) {
+
+  /* Ensure alignment */
+  size_t offset = align_offset(chunk2mem(p));
+  p = (mchunkptr)((char *)p + offset);
+  psize -= offset;
+
+  m->top = p;
+  m->topsize = psize;
+  p->head = psize | PINUSE_BIT;
+  /* set size of fake trailing chunk holding overhead space only once */
+  chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+  m->trim_check = mparams.trim_threshold;           /* reset on each update */
+
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m) {
+
+  /* Establish circular links for smallbins */
+  bindex_t i;
+  for (i = 0; i < NSMALLBINS; ++i) {
+
+    sbinptr bin = smallbin_at(m, i);
+    bin->fd = bin->bk = bin;
+
+  }
+
+}
+
+  #if PROCEED_ON_ERROR
+
+/* default corruption action */
+static void reset_on_error(mstate m) {
+
+  int i;
+  ++malloc_corruption_error_count;
+  /* Reinitialize fields to forget about all memory */
+  m->smallmap = m->treemap = 0;
+  m->dvsize = m->topsize = 0;
+  m->seg.base = 0;
+  m->seg.size = 0;
+  m->seg.next = 0;
+  m->top = m->dv = 0;
+  for (i = 0; i < NTREEBINS; ++i)
+    *treebin_at(m, i) = 0;
+  init_bins(m);
+
+}
+
+  #endif                                                /* PROCEED_ON_ERROR */
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb) {
+
+  mchunkptr p = align_as_chunk(newbase);
+  mchunkptr oldfirst = align_as_chunk(oldbase);
+  size_t    psize = (char *)oldfirst - (char *)p;
+  mchunkptr q = chunk_plus_offset(p, nb);
+  size_t    qsize = psize - nb;
+  set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+  assert((char *)oldfirst > (char *)q);
+  assert(pinuse(oldfirst));
+  assert(qsize >= MIN_CHUNK_SIZE);
+
+  /* consolidate remainder with first chunk of old base */
+  if (oldfirst == m->top) {
+
+    size_t tsize = m->topsize += qsize;
+    m->top = q;
+    q->head = tsize | PINUSE_BIT;
+    check_top_chunk(m, q);
+
+  } else if (oldfirst == m->dv) {
+
+    size_t dsize = m->dvsize += qsize;
+    m->dv = q;
+    set_size_and_pinuse_of_free_chunk(q, dsize);
+
+  } else {
+
+    if (!is_inuse(oldfirst)) {
+
+      size_t nsize = chunksize(oldfirst);
+      unlink_chunk(m, oldfirst, nsize);
+      oldfirst = chunk_plus_offset(oldfirst, nsize);
+      qsize += nsize;
+
+    }
+
+    set_free_with_pinuse(q, qsize, oldfirst);
+    insert_chunk(m, q, qsize);
+    check_free_chunk(m, q);
+
+  }
+
+  check_malloced_chunk(m, chunk2mem(p), nb);
+  return chunk2mem(p);
+
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char *tbase, size_t tsize, flag_t mmapped) {
+
+  /* Determine locations and sizes of segment, fenceposts, old top */
+  char *      old_top = (char *)m->top;
+  msegmentptr oldsp = segment_holding(m, old_top);
+  char *      old_end = oldsp->base + oldsp->size;
+  size_t      ssize = pad_request(sizeof(struct malloc_segment));
+  char *      rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+  size_t      offset = align_offset(chunk2mem(rawsp));
+  char *      asp = rawsp + offset;
+  char *      csp = (asp < (old_top + MIN_CHUNK_SIZE)) ? old_top : asp;
+  mchunkptr   sp = (mchunkptr)csp;
+  msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+  mchunkptr   tnext = chunk_plus_offset(sp, ssize);
+  mchunkptr   p = tnext;
+  int         nfences = 0;
+
+  /* reset top to new space */
+  init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+  /* Set up segment record */
+  assert(is_aligned(ss));
+  set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+  *ss = m->seg;                                      /* Push current record */
+  m->seg.base = tbase;
+  m->seg.size = tsize;
+  m->seg.sflags = mmapped;
+  m->seg.next = ss;
+
+  /* Insert trailing fenceposts */
+  for (;;) {
+
+    mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+    p->head = FENCEPOST_HEAD;
+    ++nfences;
+    if ((char *)(&(nextp->head)) < old_end)
+      p = nextp;
+    else
+      break;
+
+  }
+
+  assert(nfences >= 2);
+
+  /* Insert the rest of old top into a bin as an ordinary free chunk */
+  if (csp != old_top) {
+
+    mchunkptr q = (mchunkptr)old_top;
+    size_t    psize = csp - old_top;
+    mchunkptr tn = chunk_plus_offset(q, psize);
+    set_free_with_pinuse(q, psize, tn);
+    insert_chunk(m, q, psize);
+
+  }
+
+  check_top_chunk(m, m->top);
+
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+/* Get memory from system using MORECORE or MMAP */
+static void *sys_alloc(mstate m, size_t nb) {
+
+  char * tbase = CMFAIL;
+  size_t tsize = 0;
+  flag_t mmap_flag = 0;
+  size_t asize;                                          /* allocation size */
+
+  ensure_initialization();
+
+  /* Directly map large chunks, but only if already initialized */
+  if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
+
+    void *mem = mmap_alloc(m, nb);
+    if (mem != 0) return mem;
+
+  }
+
+  asize = granularity_align(nb + SYS_ALLOC_PADDING);
+  if (asize <= nb) return 0;                                  /* wraparound */
+  if (m->footprint_limit != 0) {
+
+    size_t fp = m->footprint + asize;
+    if (fp <= m->footprint || fp > m->footprint_limit) return 0;
+
+  }
+
+  /*
+    Try getting memory in any of three ways (in most-preferred to
+    least-preferred order):
+    1. A call to MORECORE that can normally contiguously extend memory.
+       (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
+       or main space is mmapped or a previous contiguous call failed)
+    2. A call to MMAP new space (disabled if not HAVE_MMAP).
+       Note that under the default settings, if MORECORE is unable to
+       fulfill a request, and HAVE_MMAP is true, then mmap is
+       used as a noncontiguous system allocator. This is a useful backup
+       strategy for systems with holes in address spaces -- in this case
+       sbrk cannot contiguously expand the heap, but mmap may be able to
+       find space.
+    3. A call to MORECORE that cannot usually contiguously extend memory.
+       (disabled if not HAVE_MORECORE)
+
+   In all cases, we need to request enough bytes from system to ensure
+   we can malloc nb bytes upon success, so pad with enough space for
+   top_foot, plus alignment-pad to make sure we don't lose bytes if
+   not on boundary, and round this up to a granularity unit.
+  */
+
+  if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
+
+    char *      br = CMFAIL;
+    size_t      ssize = asize;                            /* sbrk call size */
+    msegmentptr ss = (m->top == 0) ? 0 : segment_holding(m, (char *)m->top);
+    ACQUIRE_MALLOC_GLOBAL_LOCK();
+
+    if (ss == 0) {                        /* First time through or recovery */
+      char *base = (char *)CALL_MORECORE(0);
+      if (base != CMFAIL) {
+
+        size_t fp;
+        /* Adjust to end on a page boundary */
+        if (!is_page_aligned(base))
+          ssize += (page_align((size_t)base) - (size_t)base);
+        fp = m->footprint + ssize;                        /* recheck limits */
+        if (ssize > nb && ssize < HALF_MAX_SIZE_T &&
+            (m->footprint_limit == 0 ||
+             (fp > m->footprint && fp <= m->footprint_limit)) &&
+            (br = (char *)(CALL_MORECORE(ssize))) == base) {
+
+          tbase = base;
+          tsize = ssize;
+
+        }
+
+      }
+
+    } else {
+
+      /* Subtract out existing available top space from MORECORE request. */
+      ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
+      /* Use mem here only if it did continuously extend old space */
+      if (ssize < HALF_MAX_SIZE_T &&
+          (br = (char *)(CALL_MORECORE(ssize))) == ss->base + ss->size) {
+
+        tbase = br;
+        tsize = ssize;
+
+      }
+
+    }
+
+    if (tbase == CMFAIL) {                     /* Cope with partial failure */
+      if (br != CMFAIL) {         /* Try to use/extend the space we did get */
+        if (ssize < HALF_MAX_SIZE_T && ssize < nb + SYS_ALLOC_PADDING) {
+
+          size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize);
+          if (esize < HALF_MAX_SIZE_T) {
+
+            char *end = (char *)CALL_MORECORE(esize);
+            if (end != CMFAIL)
+              ssize += esize;
+            else {                             /* Can't use; try to release */
+              (void)CALL_MORECORE(-ssize);
+              br = CMFAIL;
+
+            }
+
+          }
+
+        }
+
+      }
+
+      if (br != CMFAIL) {                       /* Use the space we did get */
+        tbase = br;
+        tsize = ssize;
+
+      } else
+
+        disable_contiguous(m);   /* Don't try contiguous path in the future */
+
+    }
+
+    RELEASE_MALLOC_GLOBAL_LOCK();
+
+  }
+
+  if (HAVE_MMAP && tbase == CMFAIL) {                           /* Try MMAP */
+    char *mp = (char *)(CALL_MMAP(asize));
+    if (mp != CMFAIL) {
+
+      tbase = mp;
+      tsize = asize;
+      mmap_flag = USE_MMAP_BIT;
+
+    }
+
+  }
+
+  if (HAVE_MORECORE && tbase == CMFAIL) {     /* Try noncontiguous MORECORE */
+    if (asize < HALF_MAX_SIZE_T) {
+
+      char *br = CMFAIL;
+      char *end = CMFAIL;
+      ACQUIRE_MALLOC_GLOBAL_LOCK();
+      br = (char *)(CALL_MORECORE(asize));
+      end = (char *)(CALL_MORECORE(0));
+      RELEASE_MALLOC_GLOBAL_LOCK();
+      if (br != CMFAIL && end != CMFAIL && br < end) {
+
+        size_t ssize = end - br;
+        if (ssize > nb + TOP_FOOT_SIZE) {
+
+          tbase = br;
+          tsize = ssize;
+
+        }
+
+      }
+
+    }
+
+  }
+
+  if (tbase != CMFAIL) {
+
+    if ((m->footprint += tsize) > m->max_footprint)
+      m->max_footprint = m->footprint;
+
+    if (!is_initialized(m)) {                  /* first-time initialization */
+      if (m->least_addr == 0 || tbase < m->least_addr) m->least_addr = tbase;
+      m->seg.base = tbase;
+      m->seg.size = tsize;
+      m->seg.sflags = mmap_flag;
+      m->magic = mparams.magic;
+      m->release_checks = MAX_RELEASE_CHECK_RATE;
+      init_bins(m);
+  #if !ONLY_MSPACES
+      if (is_global(m))
+        init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+      else
+  #endif
+      {
+
+        /* Offset top by embedded malloc_state */
+        mchunkptr mn = next_chunk(mem2chunk(m));
+        init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
+
+      }
+
+    }
+
+    else {
+
+      /* Try to merge with an existing segment */
+      msegmentptr sp = &m->seg;
+      /* Only consider most recent segment if traversal suppressed */
+      while (sp != 0 && tbase != sp->base + sp->size)
+        sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+      if (sp != 0 && !is_extern_segment(sp) &&
+          (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
+          segment_holds(sp, m->top)) {                            /* append */
+        sp->size += tsize;
+        init_top(m, m->top, m->topsize + tsize);
+
+      } else {
+
+        if (tbase < m->least_addr) m->least_addr = tbase;
+        sp = &m->seg;
+        while (sp != 0 && sp->base != tbase + tsize)
+          sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
+        if (sp != 0 && !is_extern_segment(sp) &&
+            (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
+
+          char *oldbase = sp->base;
+          sp->base = tbase;
+          sp->size += tsize;
+          return prepend_alloc(m, tbase, oldbase, nb);
+
+        } else
+
+          add_segment(m, tbase, tsize, mmap_flag);
+
+      }
+
+    }
+
+    if (nb < m->topsize) {       /* Allocate from new or extended top space */
+      size_t    rsize = m->topsize -= nb;
+      mchunkptr p = m->top;
+      mchunkptr r = m->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+      check_top_chunk(m, m->top);
+      check_malloced_chunk(m, chunk2mem(p), nb);
+      return chunk2mem(p);
+
+    }
+
+  }
+
+  MALLOC_FAILURE_ACTION;
+  return 0;
+
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m) {
+
+  size_t      released = 0;
+  int         nsegs = 0;
+  msegmentptr pred = &m->seg;
+  msegmentptr sp = pred->next;
+  while (sp != 0) {
+
+    char *      base = sp->base;
+    size_t      size = sp->size;
+    msegmentptr next = sp->next;
+    ++nsegs;
+    if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
+
+      mchunkptr p = align_as_chunk(base);
+      size_t    psize = chunksize(p);
+      /* Can unmap if first chunk holds entire segment and not pinned */
+      if (!is_inuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
+
+        tchunkptr tp = (tchunkptr)p;
+        assert(segment_holds(sp, (char *)sp));
+        if (p == m->dv) {
+
+          m->dv = 0;
+          m->dvsize = 0;
+
+        } else {
+
+          unlink_large_chunk(m, tp);
+
+        }
+
+        if (CALL_MUNMAP(base, size) == 0) {
+
+          released += size;
+          m->footprint -= size;
+          /* unlink obsoleted record */
+          sp = pred;
+          sp->next = next;
+
+        } else {                                /* back out if cannot unmap */
+
+          insert_large_chunk(m, tp, psize);
+
+        }
+
+      }
+
+    }
+
+    if (NO_SEGMENT_TRAVERSAL)                    /* scan only first segment */
+      break;
+    pred = sp;
+    sp = next;
+
+  }
+
+  /* Reset check counter */
+  m->release_checks = (((size_t)nsegs > (size_t)MAX_RELEASE_CHECK_RATE)
+                           ? (size_t)nsegs
+                           : (size_t)MAX_RELEASE_CHECK_RATE);
+  return released;
+
+}
+
+static int sys_trim(mstate m, size_t pad) {
+
+  size_t released = 0;
+  ensure_initialization();
+  if (pad < MAX_REQUEST && is_initialized(m)) {
+
+    pad += TOP_FOOT_SIZE;        /* ensure enough room for segment overhead */
+
+    if (m->topsize > pad) {
+
+      /* Shrink top space in granularity-size units, keeping at least one */
+      size_t unit = mparams.granularity;
+      size_t extra =
+          ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - SIZE_T_ONE) * unit;
+      msegmentptr sp = segment_holding(m, (char *)m->top);
+
+      if (!is_extern_segment(sp)) {
+
+        if (is_mmapped_segment(sp)) {
+
+          if (HAVE_MMAP && sp->size >= extra &&
+              !has_segment_link(m, sp)) {         /* can't shrink if pinned */
+            size_t newsize = sp->size - extra;
+            (void)newsize;    /* placate people compiling -Wunused-variable */
+            /* Prefer mremap, fall back to munmap */
+            if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+                (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+
+              released = extra;
+
+            }
+
+          }
+
+        } else if (HAVE_MORECORE) {
+
+          if (extra >= HALF_MAX_SIZE_T)          /* Avoid wrapping negative */
+            extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
+          ACQUIRE_MALLOC_GLOBAL_LOCK();
+          {
+
+            /* Make sure end of memory is where we last set it. */
+            char *old_br = (char *)(CALL_MORECORE(0));
+            if (old_br == sp->base + sp->size) {
+
+              char *rel_br = (char *)(CALL_MORECORE(-extra));
+              char *new_br = (char *)(CALL_MORECORE(0));
+              if (rel_br != CMFAIL && new_br < old_br)
+                released = old_br - new_br;
+
+            }
+
+          }
+
+          RELEASE_MALLOC_GLOBAL_LOCK();
+
+        }
+
+      }
+
+      if (released != 0) {
+
+        sp->size -= released;
+        m->footprint -= released;
+        init_top(m, m->top, m->topsize - released);
+        check_top_chunk(m, m->top);
+
+      }
+
+    }
+
+    /* Unmap any unused mmapped segments */
+    if (HAVE_MMAP) released += release_unused_segments(m);
+
+    /* On failure, disable autotrim to avoid repeated failed future calls */
+    if (released == 0 && m->topsize > m->trim_check) m->trim_check = MAX_SIZE_T;
+
+  }
+
+  return (released != 0) ? 1 : 0;
+
+}
+
+/* Consolidate and bin a chunk. Differs from exported versions
+   of free mainly in that the chunk need not be marked as inuse.
+*/
+static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
+
+  mchunkptr next = chunk_plus_offset(p, psize);
+  if (!pinuse(p)) {
+
+    mchunkptr prev;
+    size_t    prevsize = p->prev_foot;
+    if (is_mmapped(p)) {
+
+      psize += prevsize + MMAP_FOOT_PAD;
+      if (CALL_MUNMAP((char *)p - prevsize, psize) == 0) m->footprint -= psize;
+      return;
+
+    }
+
+    prev = chunk_minus_offset(p, prevsize);
+    psize += prevsize;
+    p = prev;
+    if (RTCHECK(ok_address(m, prev))) {             /* consolidate backward */
+      if (p != m->dv) {
+
+        unlink_chunk(m, p, prevsize);
+
+      } else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+
+        m->dvsize = psize;
+        set_free_with_pinuse(p, psize, next);
+        return;
+
+      }
+
+    } else {
+
+      CORRUPTION_ERROR_ACTION(m);
+      return;
+
+    }
+
+  }
+
+  if (RTCHECK(ok_address(m, next))) {
+
+    if (!cinuse(next)) {                             /* consolidate forward */
+      if (next == m->top) {
+
+        size_t tsize = m->topsize += psize;
+        m->top = p;
+        p->head = tsize | PINUSE_BIT;
+        if (p == m->dv) {
+
+          m->dv = 0;
+          m->dvsize = 0;
+
+        }
+
+        return;
+
+      } else if (next == m->dv) {
+
+        size_t dsize = m->dvsize += psize;
+        m->dv = p;
+        set_size_and_pinuse_of_free_chunk(p, dsize);
+        return;
+
+      } else {
+
+        size_t nsize = chunksize(next);
+        psize += nsize;
+        unlink_chunk(m, next, nsize);
+        set_size_and_pinuse_of_free_chunk(p, psize);
+        if (p == m->dv) {
+
+          m->dvsize = psize;
+          return;
+
+        }
+
+      }
+
+    } else {
+
+      set_free_with_pinuse(p, psize, next);
+
+    }
+
+    insert_chunk(m, p, psize);
+
+  } else {
+
+    CORRUPTION_ERROR_ACTION(m);
+
+  }
+
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void *tmalloc_large(mstate m, size_t nb) {
+
+  tchunkptr v = 0;
+  size_t    rsize = -nb;                               /* Unsigned negation */
+  tchunkptr t;
+  bindex_t  idx;
+  compute_tree_index(nb, idx);
+  if ((t = *treebin_at(m, idx)) != 0) {
+
+    /* Traverse tree for this bin looking for node with size == nb */
+    size_t    sizebits = nb << leftshift_for_tree_index(idx);
+    tchunkptr rst = 0;                 /* The deepest untaken right subtree */
+    for (;;) {
+
+      tchunkptr rt;
+      size_t    trem = chunksize(t) - nb;
+      if (trem < rsize) {
+
+        v = t;
+        if ((rsize = trem) == 0) break;
+
+      }
+
+      rt = t->child[1];
+      t = t->child[(sizebits >> (SIZE_T_BITSIZE - SIZE_T_ONE)) & 1];
+      if (rt != 0 && rt != t) rst = rt;
+      if (t == 0) {
+
+        t = rst;               /* set t to least subtree holding sizes > nb */
+        break;
+
+      }
+
+      sizebits <<= 1;
+
+    }
+
+  }
+
+  if (t == 0 && v == 0) {        /* set t to root of next non-empty treebin */
+    binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+    if (leftbits != 0) {
+
+      bindex_t i;
+      binmap_t leastbit = least_bit(leftbits);
+      compute_bit2idx(leastbit, i);
+      t = *treebin_at(m, i);
+
+    }
+
+  }
+
+  while (t != 0) {                      /* find smallest of tree or subtree */
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+
+      rsize = trem;
+      v = t;
+
+    }
+
+    t = leftmost_child(t);
+
+  }
+
+  /*  If dv is a better fit, return 0 so malloc will use it */
+  if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+
+    if (RTCHECK(ok_address(m, v))) {                               /* split */
+      mchunkptr r = chunk_plus_offset(v, nb);
+      assert(chunksize(v) == rsize + nb);
+      if (RTCHECK(ok_next(v, r))) {
+
+        unlink_large_chunk(m, v);
+        if (rsize < MIN_CHUNK_SIZE)
+          set_inuse_and_pinuse(m, v, (rsize + nb));
+        else {
+
+          set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+          set_size_and_pinuse_of_free_chunk(r, rsize);
+          insert_chunk(m, r, rsize);
+
+        }
+
+        return chunk2mem(v);
+
+      }
+
+    }
+
+    CORRUPTION_ERROR_ACTION(m);
+
+  }
+
+  return 0;
+
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void *tmalloc_small(mstate m, size_t nb) {
+
+  tchunkptr t, v;
+  size_t    rsize;
+  bindex_t  i;
+  binmap_t  leastbit = least_bit(m->treemap);
+  compute_bit2idx(leastbit, i);
+  v = t = *treebin_at(m, i);
+  rsize = chunksize(t) - nb;
+
+  while ((t = leftmost_child(t)) != 0) {
+
+    size_t trem = chunksize(t) - nb;
+    if (trem < rsize) {
+
+      rsize = trem;
+      v = t;
+
+    }
+
+  }
+
+  if (RTCHECK(ok_address(m, v))) {
+
+    mchunkptr r = chunk_plus_offset(v, nb);
+    assert(chunksize(v) == rsize + nb);
+    if (RTCHECK(ok_next(v, r))) {
+
+      unlink_large_chunk(m, v);
+      if (rsize < MIN_CHUNK_SIZE)
+        set_inuse_and_pinuse(m, v, (rsize + nb));
+      else {
+
+        set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        replace_dv(m, r, rsize);
+
+      }
+
+      return chunk2mem(v);
+
+    }
+
+  }
+
+  CORRUPTION_ERROR_ACTION(m);
+  return 0;
+
+}
+
+  #if !ONLY_MSPACES
+
+void *dlmalloc(size_t bytes) {
+
+    /*
+       Basic algorithm:
+       If a small request (< 256 bytes minus per-chunk overhead):
+         1. If one exists, use a remainderless chunk in associated smallbin.
+            (Remainderless means that there are too few excess bytes to
+            represent as a chunk.)
+         2. If it is big enough, use the dv chunk, which is normally the
+            chunk adjacent to the one used for the most recent small request.
+         3. If one exists, split the smallest available chunk in a bin,
+            saving remainder in dv.
+         4. If it is big enough, use the top chunk.
+         5. If available, get memory from system and use it
+       Otherwise, for a large request:
+         1. Find the smallest available binned chunk that fits, and use it
+            if it is better fitting than dv chunk, splitting if necessary.
+         2. If better fitting than any binned chunk, use the dv chunk.
+         3. If it is big enough, use the top chunk.
+         4. If request size >= mmap threshold, try to directly mmap this chunk.
+         5. If available, get memory from system and use it
+
+       The ugly goto's here ensure that postaction occurs along all paths.
+    */
+
+    #if USE_LOCKS
+  ensure_initialization();    /* initialize in sys_alloc if not using locks */
+    #endif
+
+  if (!PREACTION(gm)) {
+
+    void * mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST) ? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = gm->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) {    /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;                /* Uses next bin if idx empty */
+        b = smallbin_at(gm, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(gm, b, p, idx);
+        set_inuse_and_pinuse(gm, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+
+      }
+
+      else if (nb > gm->dvsize) {
+
+        if (smallbits != 0) {        /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t    rsize;
+          bindex_t  i;
+          binmap_t  leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t  leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(gm, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(gm, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(gm, p, small_index2size(i));
+          else {
+
+            set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(gm, r, rsize);
+
+          }
+
+          mem = chunk2mem(p);
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+
+        }
+
+        else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
+
+          check_malloced_chunk(gm, mem, nb);
+          goto postaction;
+
+        }
+
+      }
+
+    } else if (bytes >= MAX_REQUEST)
+
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+
+      nb = pad_request(bytes);
+      if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
+
+        check_malloced_chunk(gm, mem, nb);
+        goto postaction;
+
+      }
+
+    }
+
+    if (nb <= gm->dvsize) {
+
+      size_t    rsize = gm->dvsize - nb;
+      mchunkptr p = gm->dv;
+      if (rsize >= MIN_CHUNK_SIZE) {                            /* split dv */
+        mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
+        gm->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+
+      } else {                                                /* exhaust dv */
+
+        size_t dvs = gm->dvsize;
+        gm->dvsize = 0;
+        gm->dv = 0;
+        set_inuse_and_pinuse(gm, p, dvs);
+
+      }
+
+      mem = chunk2mem(p);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+
+    }
+
+    else if (nb < gm->topsize) {                               /* Split top */
+      size_t    rsize = gm->topsize -= nb;
+      mchunkptr p = gm->top;
+      mchunkptr r = gm->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(gm, gm->top);
+      check_malloced_chunk(gm, mem, nb);
+      goto postaction;
+
+    }
+
+    mem = sys_alloc(gm, nb);
+
+  postaction:
+    POSTACTION(gm);
+    return mem;
+
+  }
+
+  return 0;
+
+}
+
+/* ---------------------------- free --------------------------- */
+
+void dlfree(void *mem) {
+
+  /*
+     Consolidate freed chunks with preceeding or succeeding bordering
+     free chunks, if they exist, and then place in a bin.  Intermixed
+     with special cases for top, dv, mmapped chunks, and usage errors.
+  */
+
+  if (mem != 0) {
+
+    mchunkptr p = mem2chunk(mem);
+    #if FOOTERS
+    mstate fm = get_mstate_for(p);
+    if (!ok_magic(fm)) {
+
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+
+    }
+
+    #else                                                        /* FOOTERS */
+      #define fm gm
+    #endif                                                       /* FOOTERS */
+    if (!PREACTION(fm)) {
+
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+
+        size_t    psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char *)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+
+          } else {
+
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) {    /* consolidate backward */
+              if (p != fm->dv) {
+
+                unlink_chunk(fm, p, prevsize);
+
+              } else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+
+              }
+
+            } else
+
+              goto erroraction;
+
+          }
+
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+
+          if (!cinuse(next)) {                       /* consolidate forward */
+            if (next == fm->top) {
+
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+
+                fm->dv = 0;
+                fm->dvsize = 0;
+
+              }
+
+              if (should_trim(fm, tsize)) sys_trim(fm, 0);
+              goto postaction;
+
+            } else if (next == fm->dv) {
+
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+
+            } else {
+
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+
+                fm->dvsize = psize;
+                goto postaction;
+
+              }
+
+            }
+
+          } else
+
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+
+          } else {
+
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0) release_unused_segments(fm);
+
+          }
+
+          goto postaction;
+
+        }
+
+      }
+
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+
+    }
+
+  }
+
+    #if !FOOTERS
+      #undef fm
+    #endif                                                       /* FOOTERS */
+
+}
+
+void *dlcalloc(size_t n_elements, size_t elem_size) {
+
+  void * mem;
+  size_t req = 0;
+  if (n_elements != 0) {
+
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T;             /* force downstream failure on overflow */
+
+  }
+
+  mem = dlmalloc(req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    __builtin_memset(mem, 0, req);
+  return mem;
+
+}
+
+  #endif                                                   /* !ONLY_MSPACES */
+
+/* ------------ Internal support for realloc, memalign, etc -------------- */
+
+/* Try to realloc; only in-place unless can_move true */
+static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
+                                   int can_move) {
+
+  mchunkptr newp = 0;
+  size_t    oldsize = chunksize(p);
+  mchunkptr next = chunk_plus_offset(p, oldsize);
+  if (RTCHECK(ok_address(m, p) && ok_inuse(p) && ok_next(p, next) &&
+              ok_pinuse(next))) {
+
+    if (is_mmapped(p)) {
+
+      newp = mmap_resize(m, p, nb, can_move);
+
+    } else if (oldsize >= nb) {                       /* already big enough */
+
+      size_t rsize = oldsize - nb;
+      if (rsize >= MIN_CHUNK_SIZE) {                 /* split off remainder */
+        mchunkptr r = chunk_plus_offset(p, nb);
+        set_inuse(m, p, nb);
+        set_inuse(m, r, rsize);
+        dispose_chunk(m, r, rsize);
+
+      }
+
+      newp = p;
+
+    } else if (next == m->top) {                         /* extend into top */
+
+      if (oldsize + m->topsize > nb) {
+
+        size_t    newsize = oldsize + m->topsize;
+        size_t    newtopsize = newsize - nb;
+        mchunkptr newtop = chunk_plus_offset(p, nb);
+        set_inuse(m, p, nb);
+        newtop->head = newtopsize | PINUSE_BIT;
+        m->top = newtop;
+        m->topsize = newtopsize;
+        newp = p;
+
+      }
+
+    } else if (next == m->dv) {                           /* extend into dv */
+
+      size_t dvs = m->dvsize;
+      if (oldsize + dvs >= nb) {
+
+        size_t dsize = oldsize + dvs - nb;
+        if (dsize >= MIN_CHUNK_SIZE) {
+
+          mchunkptr r = chunk_plus_offset(p, nb);
+          mchunkptr n = chunk_plus_offset(r, dsize);
+          set_inuse(m, p, nb);
+          set_size_and_pinuse_of_free_chunk(r, dsize);
+          clear_pinuse(n);
+          m->dvsize = dsize;
+          m->dv = r;
+
+        } else {                                              /* exhaust dv */
+
+          size_t newsize = oldsize + dvs;
+          set_inuse(m, p, newsize);
+          m->dvsize = 0;
+          m->dv = 0;
+
+        }
+
+        newp = p;
+
+      }
+
+    } else if (!cinuse(next)) {              /* extend into next free chunk */
+
+      size_t nextsize = chunksize(next);
+      if (oldsize + nextsize >= nb) {
+
+        size_t rsize = oldsize + nextsize - nb;
+        unlink_chunk(m, next, nextsize);
+        if (rsize < MIN_CHUNK_SIZE) {
+
+          size_t newsize = oldsize + nextsize;
+          set_inuse(m, p, newsize);
+
+        } else {
+
+          mchunkptr r = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, r, rsize);
+          dispose_chunk(m, r, rsize);
+
+        }
+
+        newp = p;
+
+      }
+
+    }
+
+  } else {
+
+    USAGE_ERROR_ACTION(m, chunk2mem(p));
+
+  }
+
+  return newp;
+
+}
+
+static void *internal_memalign(mstate m, size_t alignment, size_t bytes) {
+
+  void *mem = 0;
+  if (alignment < MIN_CHUNK_SIZE)  /* must be at least a minimum chunk size */
+    alignment = MIN_CHUNK_SIZE;
+  if ((alignment & (alignment - SIZE_T_ONE)) != 0) { /* Ensure a power of 2 */
+    size_t a = MALLOC_ALIGNMENT << 1;
+    while (a < alignment)
+      a <<= 1;
+    alignment = a;
+
+  }
+
+  if (bytes >= MAX_REQUEST - alignment) {
+
+    if (m != 0) {          /* Test isn't needed but avoids compiler warning */
+      MALLOC_FAILURE_ACTION;
+
+    }
+
+  } else {
+
+    size_t nb = request2size(bytes);
+    size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+    mem = internal_malloc(m, req);
+    if (mem != 0) {
+
+      mchunkptr p = mem2chunk(mem);
+      if (PREACTION(m)) return 0;
+      if ((((size_t)(mem)) & (alignment - 1)) != 0) {         /* misaligned */
+        /*
+          Find an aligned spot inside chunk.  Since we need to give
+          back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+          the first calculation places us at a spot with less than
+          MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+          We've allocated enough total room so that this is always
+          possible.
+        */
+        char *    br = (char *)mem2chunk((size_t)(
+            ((size_t)((char *)mem + alignment - SIZE_T_ONE)) & -alignment));
+        char *    pos = ((size_t)(br - (char *)(p)) >= MIN_CHUNK_SIZE)
+                            ? br
+                            : br + alignment;
+        mchunkptr newp = (mchunkptr)pos;
+        size_t    leadsize = pos - (char *)(p);
+        size_t    newsize = chunksize(p) - leadsize;
+
+        if (is_mmapped(p)) {      /* For mmapped chunks, just adjust offset */
+          newp->prev_foot = p->prev_foot + leadsize;
+          newp->head = newsize;
+
+        } else {               /* Otherwise, give back leader, use the rest */
+
+          set_inuse(m, newp, newsize);
+          set_inuse(m, p, leadsize);
+          dispose_chunk(m, p, leadsize);
+
+        }
+
+        p = newp;
+
+      }
+
+      /* Give back spare room at the end */
+      if (!is_mmapped(p)) {
+
+        size_t size = chunksize(p);
+        if (size > nb + MIN_CHUNK_SIZE) {
+
+          size_t    remainder_size = size - nb;
+          mchunkptr remainder = chunk_plus_offset(p, nb);
+          set_inuse(m, p, nb);
+          set_inuse(m, remainder, remainder_size);
+          dispose_chunk(m, remainder, remainder_size);
+
+        }
+
+      }
+
+      mem = chunk2mem(p);
+      assert(chunksize(p) >= nb);
+      assert(((size_t)mem & (alignment - 1)) == 0);
+      check_inuse_chunk(m, p);
+      POSTACTION(m);
+
+    }
+
+  }
+
+  return mem;
+
+}
+
+/*
+  Common support for independent_X routines, handling
+    all of the combinations that can result.
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+static void **ialloc(mstate m, size_t n_elements, size_t *sizes, int opts,
+                     void *chunks[]) {
+
+  size_t    element_size;         /* chunksize of each element, if all same */
+  size_t    contents_size;                        /* total size of elements */
+  size_t    array_size;                    /* request size of pointer array */
+  void *    mem;                                /* malloced aggregate space */
+  mchunkptr p;                                       /* corresponding chunk */
+  size_t    remainder_size;              /* remaining bytes while splitting */
+  void **   marray;                /* either "chunks" or malloced ptr array */
+  mchunkptr array_chunk;                    /* chunk for malloced ptr array */
+  flag_t    was_enabled;                                 /* to disable mmap */
+  size_t    size;
+  size_t    i;
+
+  ensure_initialization();
+  /* compute array length, if needed */
+  if (chunks != 0) {
+
+    if (n_elements == 0) return chunks;                    /* nothing to do */
+    marray = chunks;
+    array_size = 0;
+
+  } else {
+
+    /* if empty req, must still return chunk representing empty array */
+    if (n_elements == 0) return (void **)internal_malloc(m, 0);
+    marray = 0;
+    array_size = request2size(n_elements * (sizeof(void *)));
+
+  }
+
+  /* compute total element size */
+  if (opts & 0x1) {                                        /* all-same-size */
+    element_size = request2size(*sizes);
+    contents_size = n_elements * element_size;
+
+  } else {                                          /* add up all the sizes */
+
+    element_size = 0;
+    contents_size = 0;
+    for (i = 0; i != n_elements; ++i)
+      contents_size += request2size(sizes[i]);
+
+  }
+
+  size = contents_size + array_size;
+
+  /*
+     Allocate the aggregate chunk.  First disable direct-mmapping so
+     malloc won't use it, since we would not be able to later
+     free/realloc space internal to a segregated mmap region.
+  */
+  was_enabled = use_mmap(m);
+  disable_mmap(m);
+  mem = internal_malloc(m, size - CHUNK_OVERHEAD);
+  if (was_enabled) enable_mmap(m);
+  if (mem == 0) return 0;
+
+  if (PREACTION(m)) return 0;
+  p = mem2chunk(mem);
+  remainder_size = chunksize(p);
+
+  assert(!is_mmapped(p));
+
+  if (opts & 0x2) {                        /* optionally clear the elements */
+    __builtin_memset((size_t *)mem, 0,
+                     remainder_size - SIZE_T_SIZE - array_size);
+
+  }
+
+  /* If not provided, allocate the pointer array as final part of chunk */
+  if (marray == 0) {
+
+    size_t array_chunk_size;
+    array_chunk = chunk_plus_offset(p, contents_size);
+    array_chunk_size = remainder_size - contents_size;
+    marray = (void **)(chunk2mem(array_chunk));
+    set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
+    remainder_size = contents_size;
+
+  }
+
+  /* split out elements */
+  for (i = 0;; ++i) {
+
+    marray[i] = chunk2mem(p);
+    if (i != n_elements - 1) {
+
+      if (element_size != 0)
+        size = element_size;
+      else
+        size = request2size(sizes[i]);
+      remainder_size -= size;
+      set_size_and_pinuse_of_inuse_chunk(m, p, size);
+      p = chunk_plus_offset(p, size);
+
+    } else {           /* the final element absorbs any overallocation slop */
+
+      set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
+      break;
+
+    }
+
+  }
+
+  #if DEBUG
+  if (marray != chunks) {
+
+    /* final element must have exactly exhausted chunk */
+    if (element_size != 0) {
+
+      assert(remainder_size == element_size);
+
+    } else {
+
+      assert(remainder_size == request2size(sizes[i]));
+
+    }
+
+    check_inuse_chunk(m, mem2chunk(marray));
+
+  }
+
+  for (i = 0; i != n_elements; ++i)
+    check_inuse_chunk(m, mem2chunk(marray[i]));
+
+  #endif                                                           /* DEBUG */
+
+  POSTACTION(m);
+  return marray;
+
+}
+
+/* Try to free all pointers in the given array.
+   Note: this could be made faster, by delaying consolidation,
+   at the price of disabling some user integrity checks, We
+   still optimize some consolidations by combining adjacent
+   chunks before freeing, which will occur often if allocated
+   with ialloc or the array is sorted.
+*/
+static size_t internal_bulk_free(mstate m, void *array[], size_t nelem) {
+
+  size_t unfreed = 0;
+  if (!PREACTION(m)) {
+
+    void **a;
+    void **fence = &(array[nelem]);
+    for (a = array; a != fence; ++a) {
+
+      void *mem = *a;
+      if (mem != 0) {
+
+        mchunkptr p = mem2chunk(mem);
+        size_t    psize = chunksize(p);
+  #if FOOTERS
+        if (get_mstate_for(p) != m) {
+
+          ++unfreed;
+          continue;
+
+        }
+
+  #endif
+        check_inuse_chunk(m, p);
+        *a = 0;
+        if (RTCHECK(ok_address(m, p) && ok_inuse(p))) {
+
+          void **   b = a + 1;              /* try to merge with next chunk */
+          mchunkptr next = next_chunk(p);
+          if (b != fence && *b == chunk2mem(next)) {
+
+            size_t newsize = chunksize(next) + psize;
+            set_inuse(m, p, newsize);
+            *b = chunk2mem(p);
+
+          } else
+
+            dispose_chunk(m, p, psize);
+
+        } else {
+
+          CORRUPTION_ERROR_ACTION(m);
+          break;
+
+        }
+
+      }
+
+    }
+
+    if (should_trim(m, m->topsize)) sys_trim(m, 0);
+    POSTACTION(m);
+
+  }
+
+  return unfreed;
+
+}
+
+  /* Traversal */
+  #if MALLOC_INSPECT_ALL
+static void internal_inspect_all(mstate m,
+                                 void (*handler)(void *start, void *end,
+                                                 size_t used_bytes,
+                                                 void * callback_arg),
+                                 void *arg) {
+
+  if (is_initialized(m)) {
+
+    mchunkptr   top = m->top;
+    msegmentptr s;
+    for (s = &m->seg; s != 0; s = s->next) {
+
+      mchunkptr q = align_as_chunk(s->base);
+      while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) {
+
+        mchunkptr next = next_chunk(q);
+        size_t    sz = chunksize(q);
+        size_t    used;
+        void *    start;
+        if (is_inuse(q)) {
+
+          used = sz - CHUNK_OVERHEAD;                /* must not be mmapped */
+          start = chunk2mem(q);
+
+        } else {
+
+          used = 0;
+          if (is_small(sz)) {             /* offset by possible bookkeeping */
+            start = (void *)((char *)q + sizeof(struct malloc_chunk));
+
+          } else {
+
+            start = (void *)((char *)q + sizeof(struct malloc_tree_chunk));
+
+          }
+
+        }
+
+        if (start < (void *)next)       /* skip if all space is bookkeeping */
+          handler(start, next, used, arg);
+        if (q == top) break;
+        q = next;
+
+      }
+
+    }
+
+  }
+
+}
+
+  #endif                                              /* MALLOC_INSPECT_ALL */
+
+/* ------------------ Exported realloc, memalign, etc -------------------- */
+
+  #if !ONLY_MSPACES
+
+void *dlrealloc(void *oldmem, size_t bytes) {
+
+  void *mem = 0;
+  if (oldmem == 0) {
+
+    mem = dlmalloc(bytes);
+
+  } else if (bytes >= MAX_REQUEST) {
+
+    MALLOC_FAILURE_ACTION;
+
+  }
+
+    #ifdef REALLOC_ZERO_BYTES_FREES
+  else if (bytes == 0) {
+
+    dlfree(oldmem);
+
+  }
+
+    #endif                                      /* REALLOC_ZERO_BYTES_FREES */
+  else {
+
+    size_t    nb = request2size(bytes);
+    mchunkptr oldp = mem2chunk(oldmem);
+    #if !FOOTERS
+    mstate m = gm;
+    #else                                                        /* FOOTERS */
+    mstate m = get_mstate_for(oldp);
+    if (!ok_magic(m)) {
+
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+
+    }
+
+    #endif                                                       /* FOOTERS */
+    if (!PREACTION(m)) {
+
+      mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+      POSTACTION(m);
+      if (newp != 0) {
+
+        check_inuse_chunk(m, newp);
+        mem = chunk2mem(newp);
+
+      } else {
+
+        mem = internal_malloc(m, bytes);
+        if (mem != 0) {
+
+          size_t oc = chunksize(oldp) - overhead_for(oldp);
+          __builtin_memcpy(mem, oldmem, (oc < bytes) ? oc : bytes);
+          internal_free(m, oldmem);
+
+        }
+
+      }
+
+    }
+
+  }
+
+  return mem;
+
+}
+
+void *dlrealloc_in_place(void *oldmem, size_t bytes) {
+
+  void *mem = 0;
+  if (oldmem != 0) {
+
+    if (bytes >= MAX_REQUEST) {
+
+      MALLOC_FAILURE_ACTION;
+
+    } else {
+
+      size_t    nb = request2size(bytes);
+      mchunkptr oldp = mem2chunk(oldmem);
+    #if !FOOTERS
+      mstate m = gm;
+    #else                                                        /* FOOTERS */
+      mstate m = get_mstate_for(oldp);
+      if (!ok_magic(m)) {
+
+        USAGE_ERROR_ACTION(m, oldmem);
+        return 0;
+
+      }
+
+    #endif                                                       /* FOOTERS */
+      if (!PREACTION(m)) {
+
+        mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+        POSTACTION(m);
+        if (newp == oldp) {
+
+          check_inuse_chunk(m, newp);
+          mem = oldmem;
+
+        }
+
+      }
+
+    }
+
+  }
+
+  return mem;
+
+}
+
+void *dlmemalign(size_t alignment, size_t bytes) {
+
+  if (alignment <= MALLOC_ALIGNMENT) { return dlmalloc(bytes); }
+  return internal_memalign(gm, alignment, bytes);
+
+}
+
+int dlposix_memalign(void **pp, size_t alignment, size_t bytes) {
+
+  void *mem = 0;
+  if (alignment == MALLOC_ALIGNMENT)
+    mem = dlmalloc(bytes);
+  else {
+
+    size_t d = alignment / sizeof(void *);
+    size_t r = alignment % sizeof(void *);
+    if (r != 0 || d == 0 || (d & (d - SIZE_T_ONE)) != 0)
+      return EINVAL;
+    else if (bytes <= MAX_REQUEST - alignment) {
+
+      if (alignment < MIN_CHUNK_SIZE) alignment = MIN_CHUNK_SIZE;
+      mem = internal_memalign(gm, alignment, bytes);
+
+    }
+
+  }
+
+  if (mem == 0)
+    return ENOMEM;
+  else {
+
+    *pp = mem;
+    return 0;
+
+  }
+
+}
+
+void *dlvalloc(size_t bytes) {
+
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz, bytes);
+
+}
+
+void *dlpvalloc(size_t bytes) {
+
+  size_t pagesz;
+  ensure_initialization();
+  pagesz = mparams.page_size;
+  return dlmemalign(pagesz,
+                    (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
+
+}
+
+void **dlindependent_calloc(size_t n_elements, size_t elem_size,
+                            void *chunks[]) {
+
+  size_t sz = elem_size;                       /* serves as 1-element array */
+  return ialloc(gm, n_elements, &sz, 3, chunks);
+
+}
+
+void **dlindependent_comalloc(size_t n_elements, size_t sizes[],
+                              void *chunks[]) {
+
+  return ialloc(gm, n_elements, sizes, 0, chunks);
+
+}
+
+size_t dlbulk_free(void *array[], size_t nelem) {
+
+  return internal_bulk_free(gm, array, nelem);
+
+}
+
+    #if MALLOC_INSPECT_ALL
+void dlmalloc_inspect_all(void (*handler)(void *start, void *end,
+                                          size_t used_bytes,
+                                          void * callback_arg),
+                          void *arg) {
+
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+
+    internal_inspect_all(gm, handler, arg);
+    POSTACTION(gm);
+
+  }
+
+}
+
+    #endif                                            /* MALLOC_INSPECT_ALL */
+
+int dlmalloc_trim(size_t pad) {
+
+  int result = 0;
+  ensure_initialization();
+  if (!PREACTION(gm)) {
+
+    result = sys_trim(gm, pad);
+    POSTACTION(gm);
+
+  }
+
+  return result;
+
+}
+
+size_t dlmalloc_footprint(void) {
+
+  return gm->footprint;
+
+}
+
+size_t dlmalloc_max_footprint(void) {
+
+  return gm->max_footprint;
+
+}
+
+size_t dlmalloc_footprint_limit(void) {
+
+  size_t maf = gm->footprint_limit;
+  return maf == 0 ? MAX_SIZE_T : maf;
+
+}
+
+size_t dlmalloc_set_footprint_limit(size_t bytes) {
+
+  size_t result;                                       /* invert sense of 0 */
+  if (bytes == 0) result = granularity_align(1);        /* Use minimal size */
+  if (bytes == MAX_SIZE_T)
+    result = 0;                                                  /* disable */
+  else
+    result = granularity_align(bytes);
+  return gm->footprint_limit = result;
+
+}
+
+    #if !NO_MALLINFO
+struct mallinfo dlmallinfo(void) {
+
+  return internal_mallinfo(gm);
+
+}
+
+    #endif                                                   /* NO_MALLINFO */
+
+    #if !NO_MALLOC_STATS
+void dlmalloc_stats() {
+
+  internal_malloc_stats(gm);
+
+}
+
+    #endif                                               /* NO_MALLOC_STATS */
+
+int dlmallopt(int param_number, int value) {
+
+  return change_mparam(param_number, value);
+
+}
+
+size_t dlmalloc_usable_size(void *mem) {
+
+  if (mem != 0) {
+
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p)) return chunksize(p) - overhead_for(p);
+
+  }
+
+  return 0;
+
+}
+
+  #endif                                                   /* !ONLY_MSPACES */
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+  #if MSPACES
+
+static mstate init_user_mstate(char *tbase, size_t tsize) {
+
+  size_t    msize = pad_request(sizeof(struct malloc_state));
+  mchunkptr mn;
+  mchunkptr msp = align_as_chunk(tbase);
+  mstate    m = (mstate)(chunk2mem(msp));
+  __builtin_memset(m, 0, msize);
+  (void)INITIAL_LOCK(&m->mutex);
+  msp->head = (msize | INUSE_BITS);
+  m->seg.base = m->least_addr = tbase;
+  m->seg.size = m->footprint = m->max_footprint = tsize;
+  m->magic = mparams.magic;
+  m->release_checks = MAX_RELEASE_CHECK_RATE;
+  m->mflags = mparams.default_mflags;
+  m->extp = 0;
+  m->exts = 0;
+  disable_contiguous(m);
+  init_bins(m);
+  mn = next_chunk(mem2chunk(m));
+  init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
+  check_top_chunk(m, m->top);
+  return m;
+
+}
+
+mspace create_mspace(size_t capacity, int locked) {
+
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity < (size_t) - (msize + TOP_FOOT_SIZE + mparams.page_size)) {
+
+    size_t rs = ((capacity == 0) ? mparams.granularity
+                                 : (capacity + TOP_FOOT_SIZE + msize));
+    size_t tsize = granularity_align(rs);
+    char * tbase = (char *)(CALL_MMAP(tsize));
+    if (tbase != CMFAIL) {
+
+      m = init_user_mstate(tbase, tsize);
+      m->seg.sflags = USE_MMAP_BIT;
+      set_lock(m, locked);
+
+    }
+
+  }
+
+  return (mspace)m;
+
+}
+
+mspace create_mspace_with_base(void *base, size_t capacity, int locked) {
+
+  mstate m = 0;
+  size_t msize;
+  ensure_initialization();
+  msize = pad_request(sizeof(struct malloc_state));
+  if (capacity > msize + TOP_FOOT_SIZE &&
+      capacity < (size_t) - (msize + TOP_FOOT_SIZE + mparams.page_size)) {
+
+    m = init_user_mstate((char *)base, capacity);
+    m->seg.sflags = EXTERN_BIT;
+    set_lock(m, locked);
+
+  }
+
+  return (mspace)m;
+
+}
+
+int mspace_track_large_chunks(mspace msp, int enable) {
+
+  int    ret = 0;
+  mstate ms = (mstate)msp;
+  if (!PREACTION(ms)) {
+
+    if (!use_mmap(ms)) { ret = 1; }
+    if (!enable) {
+
+      enable_mmap(ms);
+
+    } else {
+
+      disable_mmap(ms);
+
+    }
+
+    POSTACTION(ms);
+
+  }
+
+  return ret;
+
+}
+
+size_t destroy_mspace(mspace msp) {
+
+  size_t freed = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    msegmentptr sp = &ms->seg;
+    (void)DESTROY_LOCK(&ms->mutex);              /* destroy before unmapped */
+    while (sp != 0) {
+
+      char * base = sp->base;
+      size_t size = sp->size;
+      flag_t flag = sp->sflags;
+      (void)base;             /* placate people compiling -Wunused-variable */
+      sp = sp->next;
+      if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+          CALL_MUNMAP(base, size) == 0)
+        freed += size;
+
+    }
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+  return freed;
+
+}
+
+/*
+  mspace versions of routines are near-clones of the global
+  versions. This is not so nice but better than the alternatives.
+*/
+
+void *mspace_malloc(mspace msp, size_t bytes) {
+
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+
+    USAGE_ERROR_ACTION(ms, ms);
+    return 0;
+
+  }
+
+  if (!PREACTION(ms)) {
+
+    void * mem;
+    size_t nb;
+    if (bytes <= MAX_SMALL_REQUEST) {
+
+      bindex_t idx;
+      binmap_t smallbits;
+      nb = (bytes < MIN_REQUEST) ? MIN_CHUNK_SIZE : pad_request(bytes);
+      idx = small_index(nb);
+      smallbits = ms->smallmap >> idx;
+
+      if ((smallbits & 0x3U) != 0) {    /* Remainderless fit to a smallbin. */
+        mchunkptr b, p;
+        idx += ~smallbits & 1;                /* Uses next bin if idx empty */
+        b = smallbin_at(ms, idx);
+        p = b->fd;
+        assert(chunksize(p) == small_index2size(idx));
+        unlink_first_small_chunk(ms, b, p, idx);
+        set_inuse_and_pinuse(ms, p, small_index2size(idx));
+        mem = chunk2mem(p);
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+
+      }
+
+      else if (nb > ms->dvsize) {
+
+        if (smallbits != 0) {        /* Use chunk in next nonempty smallbin */
+          mchunkptr b, p, r;
+          size_t    rsize;
+          bindex_t  i;
+          binmap_t  leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+          binmap_t  leastbit = least_bit(leftbits);
+          compute_bit2idx(leastbit, i);
+          b = smallbin_at(ms, i);
+          p = b->fd;
+          assert(chunksize(p) == small_index2size(i));
+          unlink_first_small_chunk(ms, b, p, i);
+          rsize = small_index2size(i) - nb;
+          /* Fit here cannot be remainderless if 4byte sizes */
+          if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
+            set_inuse_and_pinuse(ms, p, small_index2size(i));
+          else {
+
+            set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+            r = chunk_plus_offset(p, nb);
+            set_size_and_pinuse_of_free_chunk(r, rsize);
+            replace_dv(ms, r, rsize);
+
+          }
+
+          mem = chunk2mem(p);
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+
+        }
+
+        else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+
+          check_malloced_chunk(ms, mem, nb);
+          goto postaction;
+
+        }
+
+      }
+
+    } else if (bytes >= MAX_REQUEST)
+
+      nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+    else {
+
+      nb = pad_request(bytes);
+      if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+
+        check_malloced_chunk(ms, mem, nb);
+        goto postaction;
+
+      }
+
+    }
+
+    if (nb <= ms->dvsize) {
+
+      size_t    rsize = ms->dvsize - nb;
+      mchunkptr p = ms->dv;
+      if (rsize >= MIN_CHUNK_SIZE) {                            /* split dv */
+        mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+        ms->dvsize = rsize;
+        set_size_and_pinuse_of_free_chunk(r, rsize);
+        set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+
+      } else {                                                /* exhaust dv */
+
+        size_t dvs = ms->dvsize;
+        ms->dvsize = 0;
+        ms->dv = 0;
+        set_inuse_and_pinuse(ms, p, dvs);
+
+      }
+
+      mem = chunk2mem(p);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+
+    }
+
+    else if (nb < ms->topsize) {                               /* Split top */
+      size_t    rsize = ms->topsize -= nb;
+      mchunkptr p = ms->top;
+      mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+      r->head = rsize | PINUSE_BIT;
+      set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+      mem = chunk2mem(p);
+      check_top_chunk(ms, ms->top);
+      check_malloced_chunk(ms, mem, nb);
+      goto postaction;
+
+    }
+
+    mem = sys_alloc(ms, nb);
+
+  postaction:
+    POSTACTION(ms);
+    return mem;
+
+  }
+
+  return 0;
+
+}
+
+void mspace_free(mspace msp, void *mem) {
+
+  if (mem != 0) {
+
+    mchunkptr p = mem2chunk(mem);
+    #if FOOTERS
+    mstate fm = get_mstate_for(p);
+    (void)msp;                         /* placate people compiling -Wunused */
+    #else                                                        /* FOOTERS */
+    mstate fm = (mstate)msp;
+    #endif                                                       /* FOOTERS */
+    if (!ok_magic(fm)) {
+
+      USAGE_ERROR_ACTION(fm, p);
+      return;
+
+    }
+
+    if (!PREACTION(fm)) {
+
+      check_inuse_chunk(fm, p);
+      if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
+
+        size_t    psize = chunksize(p);
+        mchunkptr next = chunk_plus_offset(p, psize);
+        if (!pinuse(p)) {
+
+          size_t prevsize = p->prev_foot;
+          if (is_mmapped(p)) {
+
+            psize += prevsize + MMAP_FOOT_PAD;
+            if (CALL_MUNMAP((char *)p - prevsize, psize) == 0)
+              fm->footprint -= psize;
+            goto postaction;
+
+          } else {
+
+            mchunkptr prev = chunk_minus_offset(p, prevsize);
+            psize += prevsize;
+            p = prev;
+            if (RTCHECK(ok_address(fm, prev))) {    /* consolidate backward */
+              if (p != fm->dv) {
+
+                unlink_chunk(fm, p, prevsize);
+
+              } else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+
+                fm->dvsize = psize;
+                set_free_with_pinuse(p, psize, next);
+                goto postaction;
+
+              }
+
+            } else
+
+              goto erroraction;
+
+          }
+
+        }
+
+        if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
+
+          if (!cinuse(next)) {                       /* consolidate forward */
+            if (next == fm->top) {
+
+              size_t tsize = fm->topsize += psize;
+              fm->top = p;
+              p->head = tsize | PINUSE_BIT;
+              if (p == fm->dv) {
+
+                fm->dv = 0;
+                fm->dvsize = 0;
+
+              }
+
+              if (should_trim(fm, tsize)) sys_trim(fm, 0);
+              goto postaction;
+
+            } else if (next == fm->dv) {
+
+              size_t dsize = fm->dvsize += psize;
+              fm->dv = p;
+              set_size_and_pinuse_of_free_chunk(p, dsize);
+              goto postaction;
+
+            } else {
+
+              size_t nsize = chunksize(next);
+              psize += nsize;
+              unlink_chunk(fm, next, nsize);
+              set_size_and_pinuse_of_free_chunk(p, psize);
+              if (p == fm->dv) {
+
+                fm->dvsize = psize;
+                goto postaction;
+
+              }
+
+            }
+
+          } else
+
+            set_free_with_pinuse(p, psize, next);
+
+          if (is_small(psize)) {
+
+            insert_small_chunk(fm, p, psize);
+            check_free_chunk(fm, p);
+
+          } else {
+
+            tchunkptr tp = (tchunkptr)p;
+            insert_large_chunk(fm, tp, psize);
+            check_free_chunk(fm, p);
+            if (--fm->release_checks == 0) release_unused_segments(fm);
+
+          }
+
+          goto postaction;
+
+        }
+
+      }
+
+    erroraction:
+      USAGE_ERROR_ACTION(fm, p);
+    postaction:
+      POSTACTION(fm);
+
+    }
+
+  }
+
+}
+
+void *mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
+
+  void * mem;
+  size_t req = 0;
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+
+    USAGE_ERROR_ACTION(ms, ms);
+    return 0;
+
+  }
+
+  if (n_elements != 0) {
+
+    req = n_elements * elem_size;
+    if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+        (req / n_elements != elem_size))
+      req = MAX_SIZE_T;             /* force downstream failure on overflow */
+
+  }
+
+  mem = internal_malloc(ms, req);
+  if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
+    __builtin_memset(mem, 0, req);
+  return mem;
+
+}
+
+void *mspace_realloc(mspace msp, void *oldmem, size_t bytes) {
+
+  void *mem = 0;
+  if (oldmem == 0) {
+
+    mem = mspace_malloc(msp, bytes);
+
+  } else if (bytes >= MAX_REQUEST) {
+
+    MALLOC_FAILURE_ACTION;
+
+  }
+
+    #ifdef REALLOC_ZERO_BYTES_FREES
+  else if (bytes == 0) {
+
+    mspace_free(msp, oldmem);
+
+  }
+
+    #endif                                      /* REALLOC_ZERO_BYTES_FREES */
+  else {
+
+    size_t    nb = request2size(bytes);
+    mchunkptr oldp = mem2chunk(oldmem);
+    #if !FOOTERS
+    mstate m = (mstate)msp;
+    #else                                                        /* FOOTERS */
+    mstate m = get_mstate_for(oldp);
+    if (!ok_magic(m)) {
+
+      USAGE_ERROR_ACTION(m, oldmem);
+      return 0;
+
+    }
+
+    #endif                                                       /* FOOTERS */
+    if (!PREACTION(m)) {
+
+      mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
+      POSTACTION(m);
+      if (newp != 0) {
+
+        check_inuse_chunk(m, newp);
+        mem = chunk2mem(newp);
+
+      } else {
+
+        mem = mspace_malloc(m, bytes);
+        if (mem != 0) {
+
+          size_t oc = chunksize(oldp) - overhead_for(oldp);
+          __builtin_memcpy(mem, oldmem, (oc < bytes) ? oc : bytes);
+          mspace_free(m, oldmem);
+
+        }
+
+      }
+
+    }
+
+  }
+
+  return mem;
+
+}
+
+void *mspace_realloc_in_place(mspace msp, void *oldmem, size_t bytes) {
+
+  void *mem = 0;
+  if (oldmem != 0) {
+
+    if (bytes >= MAX_REQUEST) {
+
+      MALLOC_FAILURE_ACTION;
+
+    } else {
+
+      size_t    nb = request2size(bytes);
+      mchunkptr oldp = mem2chunk(oldmem);
+    #if !FOOTERS
+      mstate m = (mstate)msp;
+    #else                                                        /* FOOTERS */
+      mstate m = get_mstate_for(oldp);
+      (void)msp;                       /* placate people compiling -Wunused */
+      if (!ok_magic(m)) {
+
+        USAGE_ERROR_ACTION(m, oldmem);
+        return 0;
+
+      }
+
+    #endif                                                       /* FOOTERS */
+      if (!PREACTION(m)) {
+
+        mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
+        POSTACTION(m);
+        if (newp == oldp) {
+
+          check_inuse_chunk(m, newp);
+          mem = oldmem;
+
+        }
+
+      }
+
+    }
+
+  }
+
+  return mem;
+
+}
+
+void *mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
+
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+
+    USAGE_ERROR_ACTION(ms, ms);
+    return 0;
+
+  }
+
+  if (alignment <= MALLOC_ALIGNMENT) return mspace_malloc(msp, bytes);
+  return internal_memalign(ms, alignment, bytes);
+
+}
+
+void **mspace_independent_calloc(mspace msp, size_t n_elements,
+                                 size_t elem_size, void *chunks[]) {
+
+  size_t sz = elem_size;                       /* serves as 1-element array */
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+
+    USAGE_ERROR_ACTION(ms, ms);
+    return 0;
+
+  }
+
+  return ialloc(ms, n_elements, &sz, 3, chunks);
+
+}
+
+void **mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                   size_t sizes[], void *chunks[]) {
+
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) {
+
+    USAGE_ERROR_ACTION(ms, ms);
+    return 0;
+
+  }
+
+  return ialloc(ms, n_elements, sizes, 0, chunks);
+
+}
+
+size_t mspace_bulk_free(mspace msp, void *array[], size_t nelem) {
+
+  return internal_bulk_free((mstate)msp, array, nelem);
+
+}
+
+    #if MALLOC_INSPECT_ALL
+void mspace_inspect_all(mspace msp,
+                        void (*handler)(void *start, void *end,
+                                        size_t used_bytes, void *callback_arg),
+                        void *arg) {
+
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    if (!PREACTION(ms)) {
+
+      internal_inspect_all(ms, handler, arg);
+      POSTACTION(ms);
+
+    }
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+}
+
+    #endif                                            /* MALLOC_INSPECT_ALL */
+
+int mspace_trim(mspace msp, size_t pad) {
+
+  int    result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    if (!PREACTION(ms)) {
+
+      result = sys_trim(ms, pad);
+      POSTACTION(ms);
+
+    }
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+  return result;
+
+}
+
+    #if !NO_MALLOC_STATS
+void mspace_malloc_stats(mspace msp) {
+
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    internal_malloc_stats(ms);
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+}
+
+    #endif                                               /* NO_MALLOC_STATS */
+
+size_t mspace_footprint(mspace msp) {
+
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    result = ms->footprint;
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+  return result;
+
+}
+
+size_t mspace_max_footprint(mspace msp) {
+
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    result = ms->max_footprint;
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+  return result;
+
+}
+
+size_t mspace_footprint_limit(mspace msp) {
+
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    size_t maf = ms->footprint_limit;
+    result = (maf == 0) ? MAX_SIZE_T : maf;
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+  return result;
+
+}
+
+size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
+
+  size_t result = 0;
+  mstate ms = (mstate)msp;
+  if (ok_magic(ms)) {
+
+    if (bytes == 0) result = granularity_align(1);      /* Use minimal size */
+    if (bytes == MAX_SIZE_T)
+      result = 0;                                                /* disable */
+    else
+      result = granularity_align(bytes);
+    ms->footprint_limit = result;
+
+  } else {
+
+    USAGE_ERROR_ACTION(ms, ms);
+
+  }
+
+  return result;
+
+}
+
+    #if !NO_MALLINFO
+struct mallinfo mspace_mallinfo(mspace msp) {
+
+  mstate ms = (mstate)msp;
+  if (!ok_magic(ms)) { USAGE_ERROR_ACTION(ms, ms); }
+  return internal_mallinfo(ms);
+
+}
+
+    #endif                                                   /* NO_MALLINFO */
+
+size_t mspace_usable_size(const void *mem) {
+
+  if (mem != 0) {
+
+    mchunkptr p = mem2chunk(mem);
+    if (is_inuse(p)) return chunksize(p) - overhead_for(p);
+
+  }
+
+  return 0;
+
+}
+
+int mspace_mallopt(int param_number, int value) {
+
+  return change_mparam(param_number, value);
+
+}
+
+  #endif                                                         /* MSPACES */
+
+/* -------------------- Alternative MORECORE functions ------------------- */
+
+/*
+  Guidelines for creating a custom version of MORECORE:
+
+  * For best performance, MORECORE should allocate in multiples of pagesize.
+  * MORECORE may allocate more memory than requested. (Or even less,
+      but this will usually result in a malloc failure.)
+  * MORECORE must not allocate memory when given argument zero, but
+      instead return one past the end address of memory from previous
+      nonzero call.
+  * For best performance, consecutive calls to MORECORE with positive
+      arguments should return increasing addresses, indicating that
+      space has been contiguously extended.
+  * Even though consecutive calls to MORECORE need not return contiguous
+      addresses, it must be OK for malloc'ed chunks to span multiple
+      regions in those cases where they do happen to be contiguous.
+  * MORECORE need not handle negative arguments -- it may instead
+      just return MFAIL when given negative arguments.
+      Negative arguments are always multiples of pagesize. MORECORE
+      must not misinterpret negative args as large positive unsigned
+      args. You can suppress all such calls from even occurring by defining
+      MORECORE_CANNOT_TRIM,
+
+  As an example alternative MORECORE, here is a custom allocator
+  kindly contributed for pre-OSX macOS.  It uses virtually but not
+  necessarily physically contiguous non-paged memory (locked in,
+  present and won't get swapped out).  You can use it by uncommenting
+  this section, adding some #includes, and setting up the appropriate
+  defines above:
+
+      #define MORECORE osMoreCore
+
+  There is also a shutdown routine that should somehow be called for
+  cleanup upon program exit.
+
+  #define MAX_POOL_ENTRIES 100
+  #define MINIMUM_MORECORE_SIZE  (64 * 1024U)
+  static int next_os_pool;
+  void *our_os_pools[MAX_POOL_ENTRIES];
+
+  void *osMoreCore(int size)
+  {
+
+    void *ptr = 0;
+    static void *sbrk_top = 0;
+
+    if (size > 0)
+    {
+
+      if (size < MINIMUM_MORECORE_SIZE)
+         size = MINIMUM_MORECORE_SIZE;
+      if (CurrentExecutionLevel() == kTaskLevel)
+         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
+      if (ptr == 0)
+      {
+
+        return (void *) MFAIL;
+
+      }
+
+      // save ptrs so they can be freed during cleanup
+      our_os_pools[next_os_pool] = ptr;
+      next_os_pool++;
+      ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
+      sbrk_top = (char *) ptr + size;
+      return ptr;
+
+    }
+
+    else if (size < 0)
+    {
+
+      // we don't currently support shrink behavior
+      return (void *) MFAIL;
+
+    }
+
+    else
+    {
+
+      return sbrk_top;
+
+    }
+
+  }
+
+  // cleanup any allocated memory pools
+  // called as last thing before shutting down driver
+
+  void osCleanupMem(void)
+  {
+
+    void **ptr;
+
+    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
+      if (*ptr)
+      {
+
+         PoolDeallocate(*ptr);
+         *ptr = 0;
+
+      }
+
+  }
+
+*/
+
+/* -----------------------------------------------------------------------
+History:
+    v2.8.6 Wed Aug 29 06:57:58 2012  Doug Lea
+      * fix bad comparison in dlposix_memalign
+      * don't reuse adjusted asize in sys_alloc
+      * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion
+      * reduce compiler warnings -- thanks to all who reported/suggested these
+
+    v2.8.5 Sun May 22 10:26:02 2011  Doug Lea  (dl at gee)
+      * Always perform unlink checks unless INSECURE
+      * Add posix_memalign.
+      * Improve realloc to expand in more cases; expose realloc_in_place.
+        Thanks to Peter Buhr for the suggestion.
+      * Add footprint_limit, inspect_all, bulk_free. Thanks
+        to Barry Hayes and others for the suggestions.
+      * Internal refactorings to avoid calls while holding locks
+      * Use non-reentrant locks by default. Thanks to Roland McGrath
+        for the suggestion.
+      * Small fixes to mspace_destroy, reset_on_error.
+      * Various configuration extensions/changes. Thanks
+         to all who contributed these.
+
+    V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu)
+      * Update Creative Commons URL
+
+    V2.8.4 Wed May 27 09:56:23 2009  Doug Lea  (dl at gee)
+      * Use zeros instead of prev foot for is_mmapped
+      * Add mspace_track_large_chunks; thanks to Jean Brouwers
+      * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
+      * Fix insufficient sys_alloc padding when using 16byte alignment
+      * Fix bad error check in mspace_footprint
+      * Adaptations for ptmalloc; thanks to Wolfram Gloger.
+      * Reentrant spin locks; thanks to Earl Chew and others
+      * Win32 improvements; thanks to Niall Douglas and Earl Chew
+      * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
+      * Extension hook in malloc_state
+      * Various small adjustments to reduce warnings on some compilers
+      * Various configuration extensions/changes for more platforms. Thanks
+         to all who contributed these.
+
+    V2.8.3 Thu Sep 22 11:16:32 2005  Doug Lea  (dl at gee)
+      * Add max_footprint functions
+      * Ensure all appropriate literals are size_t
+      * Fix conditional compilation problem for some #define settings
+      * Avoid concatenating segments with the one provided
+        in create_mspace_with_base
+      * Rename some variables to avoid compiler shadowing warnings
+      * Use explicit lock initialization.
+      * Better handling of sbrk interference.
+      * Simplify and fix segment insertion, trimming and mspace_destroy
+      * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
+      * Thanks especially to Dennis Flanagan for help on these.
+
+    V2.8.2 Sun Jun 12 16:01:10 2005  Doug Lea  (dl at gee)
+      * Fix memalign brace error.
+
+    V2.8.1 Wed Jun  8 16:11:46 2005  Doug Lea  (dl at gee)
+      * Fix improper #endif nesting in C++
+      * Add explicit casts needed for C++
+
+    V2.8.0 Mon May 30 14:09:02 2005  Doug Lea  (dl at gee)
+      * Use trees for large bins
+      * Support mspaces
+      * Use segments to unify sbrk-based and mmap-based system allocation,
+        removing need for emulation on most platforms without sbrk.
+      * Default safety checks
+      * Optional footer checks. Thanks to William Robertson for the idea.
+      * Internal code refactoring
+      * Incorporate suggestions and platform-specific changes.
+        Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
+        Aaron Bachmann,  Emery Berger, and others.
+      * Speed up non-fastbin processing enough to remove fastbins.
+      * Remove useless cfree() to avoid conflicts with other apps.
+      * Remove internal memcpy, memset. Compilers handle builtins better.
+      * Remove some options that no one ever used and rename others.
+
+    V2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+      * Fix malloc_state bitmap array misdeclaration
+
+    V2.7.1 Thu Jul 25 10:58:03 2002  Doug Lea  (dl at gee)
+      * Allow tuning of FIRST_SORTED_BIN_SIZE
+      * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
+      * Better detection and support for non-contiguousness of MORECORE.
+        Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
+      * Bypass most of malloc if no frees. Thanks To Emery Berger.
+      * Fix freeing of old top non-contiguous chunk im sysmalloc.
+      * Raised default trim and map thresholds to 256K.
+      * Fix mmap-related #defines. Thanks to Lubos Lunak.
+      * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
+      * Branch-free bin calculation
+      * Default trim and mmap thresholds now 256K.
+
+    V2.7.0 Sun Mar 11 14:14:06 2001  Doug Lea  (dl at gee)
+      * Introduce independent_comalloc and independent_calloc.
+        Thanks to Michael Pachos for motivation and help.
+      * Make optional .h file available
+      * Allow > 2GB requests on 32bit systems.
+      * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
+        Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
+        and Anonymous.
+      * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
+        helping test this.)
+      * memalign: check alignment arg
+      * realloc: don't try to shift chunks backwards, since this
+        leads to  more fragmentation in some programs and doesn't
+        seem to help in any others.
+      * Collect all cases in malloc requiring system memory into sysmalloc
+      * Use mmap as backup to sbrk
+      * Place all internal state in malloc_state
+      * Introduce fastbins (although similar to 2.5.1)
+      * Many minor tunings and cosmetic improvements
+      * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
+      * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
+        Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
+      * Include errno.h to support default failure action.
+
+    V2.6.6 Sun Dec  5 07:42:19 1999  Doug Lea  (dl at gee)
+      * return null for negative arguments
+      * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
+         * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
+          (e.g. WIN32 platforms)
+         * Cleanup header file inclusion for WIN32 platforms
+         * Cleanup code to avoid Microsoft Visual C++ compiler complaints
+         * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
+           memory allocation routines
+         * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
+         * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
+           usage of 'assert' in non-WIN32 code
+         * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
+           avoid infinite loop
+      * Always call 'fREe()' rather than 'free()'
+
+    V2.6.5 Wed Jun 17 15:57:31 1998  Doug Lea  (dl at gee)
+      * Fixed ordering problem with boundary-stamping
+
+    V2.6.3 Sun May 19 08:17:58 1996  Doug Lea  (dl at gee)
+      * Added pvalloc, as recommended by H.J. Liu
+      * Added 64bit pointer support mainly from Wolfram Gloger
+      * Added anonymously donated WIN32 sbrk emulation
+      * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
+      * malloc_extend_top: fix mask error that caused wastage after
+        foreign sbrks
+      * Add linux mremap support code from HJ Liu
+
+    V2.6.2 Tue Dec  5 06:52:55 1995  Doug Lea  (dl at gee)
+      * Integrated most documentation with the code.
+      * Add support for mmap, with help from
+        Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Use last_remainder in more cases.
+      * Pack bins using idea from  colin@nyx10.cs.du.edu
+      * Use ordered bins instead of best-fit threshhold
+      * Eliminate block-local decls to simplify tracing and debugging.
+      * Support another case of realloc via move into top
+      * Fix error occuring when initial sbrk_base not word-aligned.
+      * Rely on page size for units instead of SBRK_UNIT to
+        avoid surprises about sbrk alignment conventions.
+      * Add mallinfo, mallopt. Thanks to Raymond Nijssen
+        (raymond@es.ele.tue.nl) for the suggestion.
+      * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
+      * More precautions for cases where other routines call sbrk,
+        courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
+      * Added macros etc., allowing use in linux libc from
+        H.J. Lu (hjl@gnu.ai.mit.edu)
+      * Inverted this history list
+
+    V2.6.1 Sat Dec  2 14:10:57 1995  Doug Lea  (dl at gee)
+      * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
+      * Removed all preallocation code since under current scheme
+        the work required to undo bad preallocations exceeds
+        the work saved in good cases for most test programs.
+      * No longer use return list or unconsolidated bins since
+        no scheme using them consistently outperforms those that don't
+        given above changes.
+      * Use best fit for very large chunks to prevent some worst-cases.
+      * Added some support for debugging
+
+    V2.6.0 Sat Nov  4 07:05:23 1995  Doug Lea  (dl at gee)
+      * Removed footers when chunks are in use. Thanks to
+        Paul Wilson (wilson@cs.texas.edu) for the suggestion.
+
+    V2.5.4 Wed Nov  1 07:54:51 1995  Doug Lea  (dl at gee)
+      * Added malloc_trim, with help from Wolfram Gloger
+        (wmglo@Dent.MED.Uni-Muenchen.DE).
+
+    V2.5.3 Tue Apr 26 10:16:01 1994  Doug Lea  (dl at g)
+
+    V2.5.2 Tue Apr  5 16:20:40 1994  Doug Lea  (dl at g)
+      * realloc: try to expand in both directions
+      * malloc: swap order of clean-bin strategy;
+      * realloc: only conditionally expand backwards
+      * Try not to scavenge used bins
+      * Use bin counts as a guide to preallocation
+      * Occasionally bin return list chunks in first scan
+      * Add a few optimizations from colin@nyx10.cs.du.edu
+
+    V2.5.1 Sat Aug 14 15:40:43 1993  Doug Lea  (dl at g)
+      * faster bin computation & slightly different binning
+      * merged all consolidations to one part of malloc proper
+         (eliminating old malloc_find_space & malloc_clean_bin)
+      * Scan 2 returns chunks (not just 1)
+      * Propagate failure in realloc if malloc returns 0
+      * Add stuff to allow compilation on non-ANSI compilers
+          from kpv@research.att.com
+
+    V2.5 Sat Aug  7 07:41:59 1993  Doug Lea  (dl at g.oswego.edu)
+      * removed potential for odd address access in prev_chunk
+      * removed dependency on getpagesize.h
+      * misc cosmetics and a bit more internal documentation
+      * anticosmetics: mangled names in macros to evade debugger strangeness
+      * tested on sparc, hp-700, dec-mips, rs6000
+          with gcc & native cc (hp, dec only) allowing
+          Detlefs & Zorn comparison study (in SIGPLAN Notices.)
+
+    Trial version Fri Aug 28 13:14:29 1992  Doug Lea  (dl at g.oswego.edu)
+      * Based loosely on libg++-1.2X malloc. (It retains some of the overall
+         structure of old version,  but most details differ.)
+
+*/
+
+#endif  // __GLIBC__
+
diff --git a/qemu_mode/libqasan/hooks.c b/qemu_mode/libqasan/hooks.c
new file mode 100644
index 00000000..405dddae
--- /dev/null
+++ b/qemu_mode/libqasan/hooks.c
@@ -0,0 +1,662 @@
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "libqasan.h"
+#include "map_macro.h"
+
+char *(*__lq_libc_fgets)(char *, int, FILE *);
+int (*__lq_libc_atoi)(const char *);
+long (*__lq_libc_atol)(const char *);
+long long (*__lq_libc_atoll)(const char *);
+
+void __libqasan_init_hooks(void) {
+
+  __libqasan_init_malloc();
+
+  __lq_libc_fgets = ASSERT_DLSYM(fgets);
+  __lq_libc_atoi = ASSERT_DLSYM(atoi);
+  __lq_libc_atol = ASSERT_DLSYM(atol);
+  __lq_libc_atoll = ASSERT_DLSYM(atoll);
+
+}
+
+#ifdef __ANDROID__
+size_t malloc_usable_size(const void *ptr) {
+
+#else
+size_t malloc_usable_size(void *ptr) {
+
+#endif
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: malloc_usable_size(%p)\n", rtv, ptr);
+  size_t r = __libqasan_malloc_usable_size((void *)ptr);
+  QASAN_DEBUG("\t\t = %ld\n", r);
+
+  return r;
+
+}
+
+void *malloc(size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: malloc(%ld)\n", rtv, size);
+  void *r = __libqasan_malloc(size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *calloc(size_t nmemb, size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: calloc(%ld, %ld)\n", rtv, nmemb, size);
+  void *r = __libqasan_calloc(nmemb, size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *realloc(void *ptr, size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: realloc(%p, %ld)\n", rtv, ptr, size);
+  void *r = __libqasan_realloc(ptr, size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+int posix_memalign(void **memptr, size_t alignment, size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: posix_memalign(%p, %ld, %ld)\n", rtv, memptr, alignment,
+              size);
+  int r = __libqasan_posix_memalign(memptr, alignment, size);
+  QASAN_DEBUG("\t\t = %d [*memptr = %p]\n", r, *memptr);
+
+  return r;
+
+}
+
+void *memalign(size_t alignment, size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memalign(%ld, %ld)\n", rtv, alignment, size);
+  void *r = __libqasan_memalign(alignment, size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *aligned_alloc(size_t alignment, size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: aligned_alloc(%ld, %ld)\n", rtv, alignment, size);
+  void *r = __libqasan_aligned_alloc(alignment, size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *valloc(size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: valloc(%ld)\n", rtv, size);
+  void *r = __libqasan_memalign(sysconf(_SC_PAGESIZE), size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *pvalloc(size_t size) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: pvalloc(%ld)\n", rtv, size);
+  size_t page_size = sysconf(_SC_PAGESIZE);
+  size = (size & (page_size - 1)) + page_size;
+  void *r = __libqasan_memalign(page_size, size);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void free(void *ptr) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: free(%p)\n", rtv, ptr);
+  __libqasan_free(ptr);
+
+}
+
+char *fgets(char *s, int size, FILE *stream) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: fgets(%p, %d, %p)\n", rtv, s, size, stream);
+  QASAN_STORE(s, size);
+#ifndef __ANDROID__
+  QASAN_LOAD(stream, sizeof(FILE));
+#endif
+  char *r = __lq_libc_fgets(s, size, stream);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+int memcmp(const void *s1, const void *s2, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memcmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  QASAN_LOAD(s1, n);
+  QASAN_LOAD(s2, n);
+  int r = __libqasan_memcmp(s1, s2, n);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+void *memcpy(void *dest, const void *src, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memcpy(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_LOAD(src, n);
+  QASAN_STORE(dest, n);
+  void *r = __libqasan_memcpy(dest, src, n);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *mempcpy(void *dest, const void *src, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: mempcpy(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_LOAD(src, n);
+  QASAN_STORE(dest, n);
+  void *r = (uint8_t *)__libqasan_memcpy(dest, src, n) + n;
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *memmove(void *dest, const void *src, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memmove(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_LOAD(src, n);
+  QASAN_STORE(dest, n);
+  void *r = __libqasan_memmove(dest, src, n);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *memset(void *s, int c, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memset(%p, %d, %ld)\n", rtv, s, c, n);
+  QASAN_STORE(s, n);
+  void *r = __libqasan_memset(s, c, n);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *memchr(const void *s, int c, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memchr(%p, %d, %ld)\n", rtv, s, c, n);
+  void *r = __libqasan_memchr(s, c, n);
+  if (r == NULL)
+    QASAN_LOAD(s, n);
+  else
+    QASAN_LOAD(s, r - s);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *memrchr(const void *s, int c, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memrchr(%p, %d, %ld)\n", rtv, s, c, n);
+  QASAN_LOAD(s, n);
+  void *r = __libqasan_memrchr(s, c, n);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+void *memmem(const void *haystack, size_t haystacklen, const void *needle,
+             size_t needlelen) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: memmem(%p, %ld, %p, %ld)\n", rtv, haystack, haystacklen,
+              needle, needlelen);
+  QASAN_LOAD(haystack, haystacklen);
+  QASAN_LOAD(needle, needlelen);
+  void *r = __libqasan_memmem(haystack, haystacklen, needle, needlelen);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+#ifndef __BIONIC__
+void bzero(void *s, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: bzero(%p, %ld)\n", rtv, s, n);
+  QASAN_STORE(s, n);
+  __libqasan_memset(s, 0, n);
+
+}
+
+#endif
+
+void explicit_bzero(void *s, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: bzero(%p, %ld)\n", rtv, s, n);
+  QASAN_STORE(s, n);
+  __libqasan_memset(s, 0, n);
+
+}
+
+int bcmp(const void *s1, const void *s2, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: bcmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  QASAN_LOAD(s1, n);
+  QASAN_LOAD(s2, n);
+  int r = __libqasan_bcmp(s1, s2, n);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+char *strchr(const char *s, int c) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strchr(%p, %d)\n", rtv, s, c);
+  size_t l = __libqasan_strlen(s);
+  QASAN_LOAD(s, l + 1);
+  void *r = __libqasan_strchr(s, c);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+char *strrchr(const char *s, int c) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strrchr(%p, %d)\n", rtv, s, c);
+  size_t l = __libqasan_strlen(s);
+  QASAN_LOAD(s, l + 1);
+  void *r = __libqasan_strrchr(s, c);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+int strcasecmp(const char *s1, const char *s2) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strcasecmp(%p, %p)\n", rtv, s1, s2);
+  size_t l1 = __libqasan_strlen(s1);
+  QASAN_LOAD(s1, l1 + 1);
+  size_t l2 = __libqasan_strlen(s2);
+  QASAN_LOAD(s2, l2 + 1);
+  int r = __libqasan_strcasecmp(s1, s2);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+int strncasecmp(const char *s1, const char *s2, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strncasecmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  size_t l1 = __libqasan_strnlen(s1, n);
+  QASAN_LOAD(s1, l1);
+  size_t l2 = __libqasan_strnlen(s2, n);
+  QASAN_LOAD(s2, l2);
+  int r = __libqasan_strncasecmp(s1, s2, n);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+char *strcat(char *dest, const char *src) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strcat(%p, %p)\n", rtv, dest, src);
+  size_t l2 = __libqasan_strlen(src);
+  QASAN_LOAD(src, l2 + 1);
+  size_t l1 = __libqasan_strlen(dest);
+  QASAN_STORE(dest, l1 + l2 + 1);
+  __libqasan_memcpy(dest + l1, src, l2);
+  dest[l1 + l2] = 0;
+  void *r = dest;
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+int strcmp(const char *s1, const char *s2) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strcmp(%p, %p)\n", rtv, s1, s2);
+  size_t l1 = __libqasan_strlen(s1);
+  QASAN_LOAD(s1, l1 + 1);
+  size_t l2 = __libqasan_strlen(s2);
+  QASAN_LOAD(s2, l2 + 1);
+  int r = __libqasan_strcmp(s1, s2);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+int strncmp(const char *s1, const char *s2, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strncmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  size_t l1 = __libqasan_strnlen(s1, n);
+  QASAN_LOAD(s1, l1);
+  size_t l2 = __libqasan_strnlen(s2, n);
+  QASAN_LOAD(s2, l2);
+  int r = __libqasan_strncmp(s1, s2, n);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+char *strcpy(char *dest, const char *src) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strcpy(%p, %p)\n", rtv, dest, src);
+  size_t l = __libqasan_strlen(src) + 1;
+  QASAN_LOAD(src, l);
+  QASAN_STORE(dest, l);
+  void *r = __libqasan_memcpy(dest, src, l);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+char *strncpy(char *dest, const char *src, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strncpy(%p, %p, %ld)\n", rtv, dest, src, n);
+  size_t l = __libqasan_strnlen(src, n);
+  QASAN_STORE(dest, n);
+  void *r;
+  if (l < n) {
+
+    QASAN_LOAD(src, l + 1);
+    r = __libqasan_memcpy(dest, src, l + 1);
+
+  } else {
+
+    QASAN_LOAD(src, n);
+    r = __libqasan_memcpy(dest, src, n);
+
+  }
+
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+char *stpcpy(char *dest, const char *src) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: stpcpy(%p, %p)\n", rtv, dest, src);
+  size_t l = __libqasan_strlen(src) + 1;
+  QASAN_LOAD(src, l);
+  QASAN_STORE(dest, l);
+  char *r = __libqasan_memcpy(dest, src, l) + (l - 1);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+char *strdup(const char *s) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strdup(%p)\n", rtv, s);
+  size_t l = __libqasan_strlen(s);
+  QASAN_LOAD(s, l + 1);
+  void *r = __libqasan_malloc(l + 1);
+  __libqasan_memcpy(r, s, l + 1);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+size_t strlen(const char *s) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strlen(%p)\n", rtv, s);
+  size_t r = __libqasan_strlen(s);
+  QASAN_LOAD(s, r + 1);
+  QASAN_DEBUG("\t\t = %ld\n", r);
+
+  return r;
+
+}
+
+size_t strnlen(const char *s, size_t n) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strnlen(%p, %ld)\n", rtv, s, n);
+  size_t r = __libqasan_strnlen(s, n);
+  QASAN_LOAD(s, r);
+  QASAN_DEBUG("\t\t = %ld\n", r);
+
+  return r;
+
+}
+
+char *strstr(const char *haystack, const char *needle) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strstr(%p, %p)\n", rtv, haystack, needle);
+  size_t l = __libqasan_strlen(haystack) + 1;
+  QASAN_LOAD(haystack, l);
+  l = __libqasan_strlen(needle) + 1;
+  QASAN_LOAD(needle, l);
+  void *r = __libqasan_strstr(haystack, needle);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+char *strcasestr(const char *haystack, const char *needle) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: strcasestr(%p, %p)\n", rtv, haystack, needle);
+  size_t l = __libqasan_strlen(haystack) + 1;
+  QASAN_LOAD(haystack, l);
+  l = __libqasan_strlen(needle) + 1;
+  QASAN_LOAD(needle, l);
+  void *r = __libqasan_strcasestr(haystack, needle);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+int atoi(const char *nptr) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: atoi(%p)\n", rtv, nptr);
+  size_t l = __libqasan_strlen(nptr) + 1;
+  QASAN_LOAD(nptr, l);
+  int r = __lq_libc_atoi(nptr);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
+long atol(const char *nptr) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: atol(%p)\n", rtv, nptr);
+  size_t l = __libqasan_strlen(nptr) + 1;
+  QASAN_LOAD(nptr, l);
+  long r = __lq_libc_atol(nptr);
+  QASAN_DEBUG("\t\t = %ld\n", r);
+
+  return r;
+
+}
+
+long long atoll(const char *nptr) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: atoll(%p)\n", rtv, nptr);
+  size_t l = __libqasan_strlen(nptr) + 1;
+  QASAN_LOAD(nptr, l);
+  long long r = __lq_libc_atoll(nptr);
+  QASAN_DEBUG("\t\t = %lld\n", r);
+
+  return r;
+
+}
+
+size_t wcslen(const wchar_t *s) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: wcslen(%p)\n", rtv, s);
+  size_t r = __libqasan_wcslen(s);
+  QASAN_LOAD(s, sizeof(wchar_t) * (r + 1));
+  QASAN_DEBUG("\t\t = %ld\n", r);
+
+  return r;
+
+}
+
+wchar_t *wcscpy(wchar_t *dest, const wchar_t *src) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: wcscpy(%p, %p)\n", rtv, dest, src);
+  size_t l = __libqasan_wcslen(src) + 1;
+  QASAN_LOAD(src, l * sizeof(wchar_t));
+  QASAN_STORE(dest, l * sizeof(wchar_t));
+  void *r = __libqasan_wcscpy(dest, src);
+  QASAN_DEBUG("\t\t = %p\n", r);
+
+  return r;
+
+}
+
+int wcscmp(const wchar_t *s1, const wchar_t *s2) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: wcscmp(%p, %p)\n", rtv, s1, s2);
+  size_t l1 = __libqasan_wcslen(s1);
+  QASAN_LOAD(s1, sizeof(wchar_t) * (l1 + 1));
+  size_t l2 = __libqasan_wcslen(s2);
+  QASAN_LOAD(s2, sizeof(wchar_t) * (l2 + 1));
+  int r = __libqasan_wcscmp(s1, s2);
+  QASAN_DEBUG("\t\t = %d\n", r);
+
+  return r;
+
+}
+
diff --git a/qemu_mode/libqasan/libqasan.c b/qemu_mode/libqasan/libqasan.c
new file mode 100644
index 00000000..9fc4ef7a
--- /dev/null
+++ b/qemu_mode/libqasan/libqasan.c
@@ -0,0 +1,94 @@
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "libqasan.h"
+
+#ifdef DEBUG
+int __qasan_debug;
+#endif
+int __qasan_log;
+
+void __libqasan_print_maps(void) {
+
+  int  fd = open("/proc/self/maps", O_RDONLY);
+  char buf[4096] = {0};
+
+  read(fd, buf, 4095);
+  close(fd);
+
+  size_t len = strlen(buf);
+
+  QASAN_LOG("Guest process maps:\n");
+  int   i;
+  char *line = NULL;
+  for (i = 0; i < len; i++) {
+
+    if (!line) line = &buf[i];
+    if (buf[i] == '\n') {
+
+      buf[i] = 0;
+      QASAN_LOG("%s\n", line);
+      line = NULL;
+
+    }
+
+  }
+
+  if (line) QASAN_LOG("%s\n", line);
+  QASAN_LOG("\n");
+
+}
+
+/*__attribute__((constructor))*/ void __libqasan_init() {
+
+  __libqasan_init_hooks();
+
+#ifdef DEBUG
+  __qasan_debug = getenv("QASAN_DEBUG") != NULL;
+#endif
+  __qasan_log = getenv("QASAN_LOG") != NULL;
+
+  QASAN_LOG("QEMU-AddressSanitizer (v%s)\n", QASAN_VERSTR);
+  QASAN_LOG(
+      "Copyright (C) 2019-2021 Andrea Fioraldi <andreafioraldi@gmail.com>\n");
+  QASAN_LOG("\n");
+
+  if (__qasan_log) __libqasan_print_maps();
+
+}
+
+int __libc_start_main(int (*main)(int, char **, char **), int argc, char **argv,
+                      int (*init)(int, char **, char **), void (*fini)(void),
+                      void (*rtld_fini)(void), void *stack_end) {
+
+  typeof(&__libc_start_main) orig = dlsym(RTLD_NEXT, "__libc_start_main");
+
+  __libqasan_init();
+  if (getenv("AFL_INST_LIBS")) __libqasan_hotpatch();
+
+  return orig(main, argc, argv, init, fini, rtld_fini, stack_end);
+
+}
+
diff --git a/qemu_mode/libqasan/libqasan.h b/qemu_mode/libqasan/libqasan.h
new file mode 100644
index 00000000..43b7adb5
--- /dev/null
+++ b/qemu_mode/libqasan/libqasan.h
@@ -0,0 +1,132 @@
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#ifndef __LIBQASAN_H__
+#define __LIBQASAN_H__
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <inttypes.h>
+#include <dlfcn.h>
+#include <wchar.h>
+
+#include "qasan.h"
+
+#define QASAN_LOG(msg...)                   \
+  do {                                      \
+                                            \
+    if (__qasan_log) {                      \
+                                            \
+      fprintf(stderr, "==%d== ", getpid()); \
+      fprintf(stderr, msg);                 \
+                                            \
+    }                                       \
+                                            \
+  } while (0)
+
+#ifdef DEBUG
+  #define QASAN_DEBUG(msg...)                 \
+    do {                                      \
+                                              \
+      if (__qasan_debug) {                    \
+                                              \
+        fprintf(stderr, "==%d== ", getpid()); \
+        fprintf(stderr, msg);                 \
+                                              \
+      }                                       \
+                                              \
+    } while (0)
+
+#else
+  #define QASAN_DEBUG(msg...) \
+    do {                      \
+                              \
+    } while (0)
+#endif
+
+#define ASSERT_DLSYM(name)                                              \
+  ({                                                                    \
+                                                                        \
+    void *a = (void *)dlsym(RTLD_NEXT, #name);                          \
+    if (!a) {                                                           \
+                                                                        \
+      fprintf(stderr,                                                   \
+              "FATAL ERROR: failed dlsym of " #name " in libqasan!\n"); \
+      abort();                                                          \
+                                                                        \
+    }                                                                   \
+    a;                                                                  \
+                                                                        \
+  })
+
+extern int __qasan_debug;
+extern int __qasan_log;
+
+void __libqasan_init_hooks(void);
+void __libqasan_init_malloc(void);
+
+void __libqasan_hotpatch(void);
+
+size_t __libqasan_malloc_usable_size(void *ptr);
+void * __libqasan_malloc(size_t size);
+void   __libqasan_free(void *ptr);
+void * __libqasan_calloc(size_t nmemb, size_t size);
+void * __libqasan_realloc(void *ptr, size_t size);
+int    __libqasan_posix_memalign(void **ptr, size_t align, size_t len);
+void * __libqasan_memalign(size_t align, size_t len);
+void * __libqasan_aligned_alloc(size_t align, size_t len);
+
+void *   __libqasan_memcpy(void *dest, const void *src, size_t n);
+void *   __libqasan_memmove(void *dest, const void *src, size_t n);
+void *   __libqasan_memset(void *s, int c, size_t n);
+void *   __libqasan_memchr(const void *s, int c, size_t n);
+void *   __libqasan_memrchr(const void *s, int c, size_t n);
+size_t   __libqasan_strlen(const char *s);
+size_t   __libqasan_strnlen(const char *s, size_t len);
+int      __libqasan_strcmp(const char *str1, const char *str2);
+int      __libqasan_strncmp(const char *str1, const char *str2, size_t len);
+int      __libqasan_strcasecmp(const char *str1, const char *str2);
+int      __libqasan_strncasecmp(const char *str1, const char *str2, size_t len);
+int      __libqasan_memcmp(const void *mem1, const void *mem2, size_t len);
+int      __libqasan_bcmp(const void *mem1, const void *mem2, size_t len);
+char *   __libqasan_strstr(const char *haystack, const char *needle);
+char *   __libqasan_strcasestr(const char *haystack, const char *needle);
+void *   __libqasan_memmem(const void *haystack, size_t haystack_len,
+                           const void *needle, size_t needle_len);
+char *   __libqasan_strchr(const char *s, int c);
+char *   __libqasan_strrchr(const char *s, int c);
+size_t   __libqasan_wcslen(const wchar_t *s);
+wchar_t *__libqasan_wcscpy(wchar_t *d, const wchar_t *s);
+int      __libqasan_wcscmp(const wchar_t *s1, const wchar_t *s2);
+
+#endif
+
diff --git a/qemu_mode/libqasan/malloc.c b/qemu_mode/libqasan/malloc.c
new file mode 100644
index 00000000..5a2d2a0c
--- /dev/null
+++ b/qemu_mode/libqasan/malloc.c
@@ -0,0 +1,364 @@
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "libqasan.h"
+#include <features.h>
+#include <errno.h>
+#include <stddef.h>
+#include <assert.h>
+#include <pthread.h>
+
+#define REDZONE_SIZE 128
+// 50 mb quarantine
+#define QUARANTINE_MAX_BYTES 52428800
+
+#if __STDC_VERSION__ < 201112L || \
+    (defined(__FreeBSD__) && __FreeBSD_version < 1200000)
+// use this hack if not C11
+typedef struct {
+
+  long long   __ll;
+  long double __ld;
+
+} max_align_t;
+
+#endif
+
+#define ALLOC_ALIGN_SIZE (_Alignof(max_align_t))
+
+struct chunk_begin {
+
+  size_t              requested_size;
+  void *              aligned_orig;  // NULL if not aligned
+  struct chunk_begin *next;
+  struct chunk_begin *prev;
+  char                redzone[REDZONE_SIZE];
+
+};
+
+struct chunk_struct {
+
+  struct chunk_begin begin;
+  char               redzone[REDZONE_SIZE];
+  size_t             prev_size_padding;
+
+};
+
+#ifdef __GLIBC__
+
+void *(*__lq_libc_malloc)(size_t);
+void (*__lq_libc_free)(void *);
+  #define backend_malloc __lq_libc_malloc
+  #define backend_free __lq_libc_free
+
+  #define TMP_ZONE_SIZE 4096
+static int           __tmp_alloc_zone_idx;
+static unsigned char __tmp_alloc_zone[TMP_ZONE_SIZE];
+
+#else
+
+// From dlmalloc.c
+void *                    dlmalloc(size_t);
+void                      dlfree(void *);
+  #define backend_malloc dlmalloc
+  #define backend_free dlfree
+
+#endif
+
+int __libqasan_malloc_initialized;
+
+static struct chunk_begin *quarantine_top;
+static struct chunk_begin *quarantine_end;
+static size_t              quarantine_bytes;
+
+#ifdef __BIONIC__
+static pthread_mutex_t quarantine_lock;
+  #define LOCK_TRY pthread_mutex_trylock
+  #define LOCK_INIT pthread_mutex_init
+  #define LOCK_UNLOCK pthread_mutex_unlock
+#else
+static pthread_spinlock_t quarantine_lock;
+  #define LOCK_TRY pthread_spin_trylock
+  #define LOCK_INIT pthread_spin_init
+  #define LOCK_UNLOCK pthread_spin_unlock
+#endif
+
+// need qasan disabled
+static int quanratine_push(struct chunk_begin *ck) {
+
+  if (ck->requested_size >= QUARANTINE_MAX_BYTES) return 0;
+
+  if (LOCK_TRY(&quarantine_lock)) return 0;
+
+  while (ck->requested_size + quarantine_bytes >= QUARANTINE_MAX_BYTES) {
+
+    struct chunk_begin *tmp = quarantine_end;
+    quarantine_end = tmp->prev;
+
+    quarantine_bytes -= tmp->requested_size;
+
+    if (tmp->aligned_orig)
+      backend_free(tmp->aligned_orig);
+    else
+      backend_free(tmp);
+
+  }
+
+  ck->next = quarantine_top;
+  if (quarantine_top) quarantine_top->prev = ck;
+  quarantine_top = ck;
+
+  LOCK_UNLOCK(&quarantine_lock);
+
+  return 1;
+
+}
+
+void __libqasan_init_malloc(void) {
+
+  if (__libqasan_malloc_initialized) return;
+
+#ifdef __GLIBC__
+  __lq_libc_malloc = dlsym(RTLD_NEXT, "malloc");
+  __lq_libc_free = dlsym(RTLD_NEXT, "free");
+#endif
+
+  LOCK_INIT(&quarantine_lock, PTHREAD_PROCESS_PRIVATE);
+
+  __libqasan_malloc_initialized = 1;
+  QASAN_LOG("\n");
+  QASAN_LOG("Allocator initialization done.\n");
+  QASAN_LOG("\n");
+
+}
+
+size_t __libqasan_malloc_usable_size(void *ptr) {
+
+  char *p = ptr;
+  p -= sizeof(struct chunk_begin);
+
+  return ((struct chunk_begin *)p)->requested_size;
+
+}
+
+void *__libqasan_malloc(size_t size) {
+
+  if (!__libqasan_malloc_initialized) {
+
+    __libqasan_init_malloc();
+
+#ifdef __GLIBC__
+    void *r = &__tmp_alloc_zone[__tmp_alloc_zone_idx];
+
+    if (size & (ALLOC_ALIGN_SIZE - 1))
+      __tmp_alloc_zone_idx +=
+          (size & ~(ALLOC_ALIGN_SIZE - 1)) + ALLOC_ALIGN_SIZE;
+    else
+      __tmp_alloc_zone_idx += size;
+
+    return r;
+#endif
+
+  }
+
+  int state = QASAN_SWAP(QASAN_DISABLED);  // disable qasan for this thread
+
+  struct chunk_begin *p = backend_malloc(sizeof(struct chunk_struct) + size);
+
+  QASAN_SWAP(state);
+
+  if (!p) return NULL;
+
+  QASAN_UNPOISON(p, sizeof(struct chunk_struct) + size);
+
+  p->requested_size = size;
+  p->aligned_orig = NULL;
+  p->next = p->prev = NULL;
+
+  QASAN_ALLOC(&p[1], (char *)&p[1] + size);
+  QASAN_POISON(p->redzone, REDZONE_SIZE, ASAN_HEAP_LEFT_RZ);
+  if (size & (ALLOC_ALIGN_SIZE - 1))
+    QASAN_POISON((char *)&p[1] + size,
+                 (size & ~(ALLOC_ALIGN_SIZE - 1)) + 8 - size + REDZONE_SIZE,
+                 ASAN_HEAP_RIGHT_RZ);
+  else
+    QASAN_POISON((char *)&p[1] + size, REDZONE_SIZE, ASAN_HEAP_RIGHT_RZ);
+
+  __builtin_memset(&p[1], 0xff, size);
+
+  return &p[1];
+
+}
+
+void __libqasan_free(void *ptr) {
+
+  if (!ptr) return;
+
+#ifdef __GLIBC__
+  if (ptr >= (void *)__tmp_alloc_zone &&
+      ptr < ((void *)__tmp_alloc_zone + TMP_ZONE_SIZE))
+    return;
+#endif
+
+  struct chunk_begin *p = ptr;
+  p -= 1;
+
+  size_t n = p->requested_size;
+
+  QASAN_STORE(ptr, n);
+  int state = QASAN_SWAP(QASAN_DISABLED);  // disable qasan for this thread
+
+  if (!quanratine_push(p)) {
+
+    if (p->aligned_orig)
+      backend_free(p->aligned_orig);
+    else
+      backend_free(p);
+
+  }
+
+  QASAN_SWAP(state);
+
+  if (n & (ALLOC_ALIGN_SIZE - 1))
+    n = (n & ~(ALLOC_ALIGN_SIZE - 1)) + ALLOC_ALIGN_SIZE;
+
+  QASAN_POISON(ptr, n, ASAN_HEAP_FREED);
+  QASAN_DEALLOC(ptr);
+
+}
+
+void *__libqasan_calloc(size_t nmemb, size_t size) {
+
+  size *= nmemb;
+
+#ifdef __GLIBC__
+  if (!__libqasan_malloc_initialized) {
+
+    void *r = &__tmp_alloc_zone[__tmp_alloc_zone_idx];
+    __tmp_alloc_zone_idx += size;
+    return r;
+
+  }
+
+#endif
+
+  char *p = __libqasan_malloc(size);
+  if (!p) return NULL;
+
+  __builtin_memset(p, 0, size);
+
+  return p;
+
+}
+
+void *__libqasan_realloc(void *ptr, size_t size) {
+
+  char *p = __libqasan_malloc(size);
+  if (!p) return NULL;
+
+  if (!ptr) return p;
+
+  size_t n = ((struct chunk_begin *)ptr)[-1].requested_size;
+  if (size < n) n = size;
+
+  __builtin_memcpy(p, ptr, n);
+
+  __libqasan_free(ptr);
+  return p;
+
+}
+
+int __libqasan_posix_memalign(void **ptr, size_t align, size_t len) {
+
+  if ((align % 2) || (align % sizeof(void *))) return EINVAL;
+  if (len == 0) {
+
+    *ptr = NULL;
+    return 0;
+
+  }
+
+  size_t rem = len % align;
+  size_t size = len;
+  if (rem) size += rem;
+
+  int state = QASAN_SWAP(QASAN_DISABLED);  // disable qasan for this thread
+
+  char *orig = backend_malloc(sizeof(struct chunk_struct) + size);
+
+  QASAN_SWAP(state);
+
+  if (!orig) return ENOMEM;
+
+  QASAN_UNPOISON(orig, sizeof(struct chunk_struct) + size);
+
+  char *data = orig + sizeof(struct chunk_begin);
+  data += align - ((uintptr_t)data % align);
+
+  struct chunk_begin *p = (struct chunk_begin *)data - 1;
+
+  p->requested_size = len;
+  p->aligned_orig = orig;
+
+  QASAN_ALLOC(data, data + len);
+  QASAN_POISON(p->redzone, REDZONE_SIZE, ASAN_HEAP_LEFT_RZ);
+  if (len & (ALLOC_ALIGN_SIZE - 1))
+    QASAN_POISON(
+        data + len,
+        (len & ~(ALLOC_ALIGN_SIZE - 1)) + ALLOC_ALIGN_SIZE - len + REDZONE_SIZE,
+        ASAN_HEAP_RIGHT_RZ);
+  else
+    QASAN_POISON(data + len, REDZONE_SIZE, ASAN_HEAP_RIGHT_RZ);
+
+  __builtin_memset(data, 0xff, len);
+
+  *ptr = data;
+
+  return 0;
+
+}
+
+void *__libqasan_memalign(size_t align, size_t len) {
+
+  void *ret = NULL;
+
+  __libqasan_posix_memalign(&ret, align, len);
+
+  return ret;
+
+}
+
+void *__libqasan_aligned_alloc(size_t align, size_t len) {
+
+  void *ret = NULL;
+
+  if ((len % align)) return NULL;
+
+  __libqasan_posix_memalign(&ret, align, len);
+
+  return ret;
+
+}
+
diff --git a/qemu_mode/libqasan/map_macro.h b/qemu_mode/libqasan/map_macro.h
new file mode 100644
index 00000000..e9438dc5
--- /dev/null
+++ b/qemu_mode/libqasan/map_macro.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2012 William Swanson
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the names of the authors or
+ * their institutions shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization from the authors.
+ */
+
+#ifndef MAP_H_INCLUDED
+#define MAP_H_INCLUDED
+
+#define EVAL0(...) __VA_ARGS__
+#define EVAL1(...) EVAL0(EVAL0(EVAL0(__VA_ARGS__)))
+#define EVAL2(...) EVAL1(EVAL1(EVAL1(__VA_ARGS__)))
+#define EVAL3(...) EVAL2(EVAL2(EVAL2(__VA_ARGS__)))
+#define EVAL4(...) EVAL3(EVAL3(EVAL3(__VA_ARGS__)))
+#define EVAL(...) EVAL4(EVAL4(EVAL4(__VA_ARGS__)))
+
+#define MAP_END(...)
+#define MAP_OUT
+#define MAP_COMMA ,
+
+#define MAP_GET_END2() 0, MAP_END
+#define MAP_GET_END1(...) MAP_GET_END2
+#define MAP_GET_END(...) MAP_GET_END1
+#define MAP_NEXT0(test, next, ...) next MAP_OUT
+#define MAP_NEXT1(test, next) MAP_NEXT0(test, next, 0)
+#define MAP_NEXT(test, next) MAP_NEXT1(MAP_GET_END test, next)
+
+#define MAP0(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP1)(f, peek, __VA_ARGS__)
+#define MAP1(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP0)(f, peek, __VA_ARGS__)
+
+#define MAP_LIST_NEXT1(test, next) MAP_NEXT0(test, MAP_COMMA next, 0)
+#define MAP_LIST_NEXT(test, next) MAP_LIST_NEXT1(MAP_GET_END test, next)
+
+#define MAP_LIST0(f, x, peek, ...) \
+  f(x) MAP_LIST_NEXT(peek, MAP_LIST1)(f, peek, __VA_ARGS__)
+#define MAP_LIST1(f, x, peek, ...) \
+  f(x) MAP_LIST_NEXT(peek, MAP_LIST0)(f, peek, __VA_ARGS__)
+
+/**
+ * Applies the function macro `f` to each of the remaining parameters.
+ */
+#define MAP(f, ...) EVAL(MAP1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))
+
+/**
+ * Applies the function macro `f` to each of the remaining parameters and
+ * inserts commas between the results.
+ */
+#define MAP_LIST(f, ...) \
+  EVAL(MAP_LIST1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))
+
+#endif
+
diff --git a/qemu_mode/libqasan/patch.c b/qemu_mode/libqasan/patch.c
new file mode 100644
index 00000000..fbc09c99
--- /dev/null
+++ b/qemu_mode/libqasan/patch.c
@@ -0,0 +1,243 @@
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "libqasan.h"
+#include <sys/mman.h>
+
+#ifdef __x86_64__
+
+uint8_t *__libqasan_patch_jump(uint8_t *addr, uint8_t *dest) {
+
+  // mov rax, dest
+  addr[0] = 0x48;
+  addr[1] = 0xb8;
+  *(uint8_t **)&addr[2] = dest;
+
+  // jmp rax
+  addr[10] = 0xff;
+  addr[11] = 0xe0;
+
+  return &addr[12];
+
+}
+
+#elif __i386__
+
+uint8_t *__libqasan_patch_jump(uint8_t *addr, uint8_t *dest) {
+
+  // mov eax, dest
+  addr[0] = 0xb8;
+  *(uint8_t **)&addr[1] = dest;
+
+  // jmp eax
+  addr[5] = 0xff;
+  addr[6] = 0xe0;
+
+  return &addr[7];
+
+}
+
+#elif __arm__
+
+// in ARM, r12 is a scratch register used by the linker to jump,
+// so let's use it in our stub
+
+uint8_t *__libqasan_patch_jump(uint8_t *addr, uint8_t *dest) {
+
+  // ldr r12, OFF
+  addr[0] = 0x0;
+  addr[1] = 0xc0;
+  addr[2] = 0x9f;
+  addr[3] = 0xe5;
+
+  // add pc, pc, r12
+  addr[4] = 0xc;
+  addr[5] = 0xf0;
+  addr[6] = 0x8f;
+  addr[7] = 0xe0;
+
+  // OFF: .word dest
+  *(uint32_t *)&addr[8] = (uint32_t)dest;
+
+  return &addr[12];
+
+}
+
+#elif __aarch64__
+
+// in ARM64, x16 is a scratch register used by the linker to jump,
+// so let's use it in our stub
+
+uint8_t *__libqasan_patch_jump(uint8_t *addr, uint8_t *dest) {
+
+  // ldr x16, OFF
+  addr[0] = 0x50;
+  addr[1] = 0x0;
+  addr[2] = 0x0;
+  addr[3] = 0x58;
+
+  // br x16
+  addr[4] = 0x0;
+  addr[5] = 0x2;
+  addr[6] = 0x1f;
+  addr[7] = 0xd6;
+
+  // OFF: .dword dest
+  *(uint64_t *)&addr[8] = (uint64_t)dest;
+
+  return &addr[16];
+
+}
+
+#else
+
+  #define CANNOT_HOTPATCH
+
+#endif
+
+#ifdef CANNOT_HOTPATCH
+
+void __libqasan_hotpatch(void) {
+
+}
+
+#else
+
+static void *libc_start, *libc_end;
+int          libc_perms;
+
+static void find_libc(void) {
+
+  FILE *  fp;
+  char *  line = NULL;
+  size_t  len = 0;
+  ssize_t read;
+
+  fp = fopen("/proc/self/maps", "r");
+  if (fp == NULL) return;
+
+  while ((read = getline(&line, &len, fp)) != -1) {
+
+    int      fields, dev_maj, dev_min, inode;
+    uint64_t min, max, offset;
+    char     flag_r, flag_w, flag_x, flag_p;
+    char     path[512] = "";
+    fields = sscanf(line,
+                    "%" PRIx64 "-%" PRIx64 " %c%c%c%c %" PRIx64
+                    " %x:%x %d"
+                    " %512s",
+                    &min, &max, &flag_r, &flag_w, &flag_x, &flag_p, &offset,
+                    &dev_maj, &dev_min, &inode, path);
+
+    if ((fields < 10) || (fields > 11)) continue;
+
+    if (flag_x == 'x' && (__libqasan_strstr(path, "/libc.so") ||
+                          __libqasan_strstr(path, "/libc-"))) {
+
+      libc_start = (void *)min;
+      libc_end = (void *)max;
+
+      libc_perms = PROT_EXEC;
+      if (flag_w == 'w') libc_perms |= PROT_WRITE;
+      if (flag_r == 'r') libc_perms |= PROT_READ;
+
+      break;
+
+    }
+
+  }
+
+  free(line);
+  fclose(fp);
+
+}
+
+/* Why this shit? https://twitter.com/andreafioraldi/status/1227635146452541441
+   Unfortunatly, symbol override with LD_PRELOAD is not enough to prevent libc
+   code to call this optimized XMM-based routines.
+   We patch them at runtime to call our unoptimized version of the same routine.
+*/
+
+void __libqasan_hotpatch(void) {
+
+  find_libc();
+
+  if (!libc_start) return;
+
+  if (mprotect(libc_start, libc_end - libc_start,
+               PROT_READ | PROT_WRITE | PROT_EXEC) < 0)
+    return;
+
+  void *libc = dlopen("libc.so.6", RTLD_LAZY);
+
+  #define HOTPATCH(fn)                             \
+    uint8_t *p_##fn = (uint8_t *)dlsym(libc, #fn); \
+    if (p_##fn) __libqasan_patch_jump(p_##fn, (uint8_t *)&(fn));
+
+  HOTPATCH(memcmp)
+  HOTPATCH(memmove)
+
+  uint8_t *p_memcpy = (uint8_t *)dlsym(libc, "memcpy");
+  // fuck you libc
+  if (p_memcpy && p_memmove != p_memcpy)
+    __libqasan_patch_jump(p_memcpy, (uint8_t *)&memcpy);
+
+  HOTPATCH(memchr)
+  HOTPATCH(memrchr)
+  HOTPATCH(memmem)
+  #ifndef __BIONIC__
+  HOTPATCH(bzero)
+  HOTPATCH(explicit_bzero)
+  HOTPATCH(mempcpy)
+  HOTPATCH(bcmp)
+  #endif
+
+  HOTPATCH(strchr)
+  HOTPATCH(strrchr)
+  HOTPATCH(strcasecmp)
+  HOTPATCH(strncasecmp)
+  HOTPATCH(strcat)
+  HOTPATCH(strcmp)
+  HOTPATCH(strncmp)
+  HOTPATCH(strcpy)
+  HOTPATCH(strncpy)
+  HOTPATCH(stpcpy)
+  HOTPATCH(strdup)
+  HOTPATCH(strlen)
+  HOTPATCH(strnlen)
+  HOTPATCH(strstr)
+  HOTPATCH(strcasestr)
+  HOTPATCH(wcslen)
+  HOTPATCH(wcscpy)
+  HOTPATCH(wcscmp)
+
+  #undef HOTPATCH
+
+  mprotect(libc_start, libc_end - libc_start, libc_perms);
+
+}
+
+#endif
+
diff --git a/qemu_mode/libqasan/string.c b/qemu_mode/libqasan/string.c
new file mode 100644
index 00000000..c850463b
--- /dev/null
+++ b/qemu_mode/libqasan/string.c
@@ -0,0 +1,339 @@
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "libqasan.h"
+#include <ctype.h>
+
+void *__libqasan_memcpy(void *dest, const void *src, size_t n) {
+
+  unsigned char *      d = dest;
+  const unsigned char *s = src;
+
+  if (!n) return dest;
+
+  while (n--) {
+
+    *d = *s;
+    ++d;
+    ++s;
+
+  }
+
+  return dest;
+
+}
+
+void *__libqasan_memmove(void *dest, const void *src, size_t n) {
+
+  unsigned char *      d = dest;
+  const unsigned char *s = src;
+
+  if (!n) return dest;
+
+  if (!((d + n) >= s && d <= (s + n)))  // do not overlap
+    return __libqasan_memcpy(dest, src, n);
+
+  d = __libqasan_malloc(n);
+  __libqasan_memcpy(d, src, n);
+  __libqasan_memcpy(dest, d, n);
+
+  __libqasan_free(d);
+
+  return dest;
+
+}
+
+void *__libqasan_memset(void *s, int c, size_t n) {
+
+  unsigned char *b = s;
+  while (n--)
+    *(b++) = (unsigned char)c;
+  return s;
+
+}
+
+void *__libqasan_memchr(const void *s, int c, size_t n) {
+
+  unsigned char *m = (unsigned char *)s;
+  size_t         i;
+  for (i = 0; i < n; ++i)
+    if (m[i] == (unsigned char)c) return &m[i];
+  return NULL;
+
+}
+
+void *__libqasan_memrchr(const void *s, int c, size_t n) {
+
+  unsigned char *m = (unsigned char *)s;
+  long           i;
+  for (i = n; i >= 0; --i)
+    if (m[i] == (unsigned char)c) return &m[i];
+  return NULL;
+
+}
+
+size_t __libqasan_strlen(const char *s) {
+
+  const char *i = s;
+  while (*(i++))
+    ;
+  return i - s - 1;
+
+}
+
+size_t __libqasan_strnlen(const char *s, size_t len) {
+
+  size_t r = 0;
+  while (len-- && *(s++))
+    ++r;
+  return r;
+
+}
+
+int __libqasan_strcmp(const char *str1, const char *str2) {
+
+  while (1) {
+
+    const unsigned char c1 = *str1, c2 = *str2;
+
+    if (c1 != c2) return c1 - c2;
+    if (!c1) return 0;
+    str1++;
+    str2++;
+
+  }
+
+  return 0;
+
+}
+
+int __libqasan_strncmp(const char *str1, const char *str2, size_t len) {
+
+  while (len--) {
+
+    unsigned char c1 = *str1, c2 = *str2;
+
+    if (c1 != c2) return c1 - c2;
+    if (!c1) return 0;
+    str1++;
+    str2++;
+
+  }
+
+  return 0;
+
+}
+
+int __libqasan_strcasecmp(const char *str1, const char *str2) {
+
+  while (1) {
+
+    const unsigned char c1 = tolower(*str1), c2 = tolower(*str2);
+
+    if (c1 != c2) return c1 - c2;
+    if (!c1) return 0;
+    str1++;
+    str2++;
+
+  }
+
+  return 0;
+
+}
+
+int __libqasan_strncasecmp(const char *str1, const char *str2, size_t len) {
+
+  while (len--) {
+
+    const unsigned char c1 = tolower(*str1), c2 = tolower(*str2);
+
+    if (c1 != c2) return c1 - c2;
+    if (!c1) return 0;
+    str1++;
+    str2++;
+
+  }
+
+  return 0;
+
+}
+
+int __libqasan_memcmp(const void *mem1, const void *mem2, size_t len) {
+
+  const char *strmem1 = (const char *)mem1;
+  const char *strmem2 = (const char *)mem2;
+
+  while (len--) {
+
+    const unsigned char c1 = *strmem1, c2 = *strmem2;
+    if (c1 != c2) return (c1 > c2) ? 1 : -1;
+    strmem1++;
+    strmem2++;
+
+  }
+
+  return 0;
+
+}
+
+int __libqasan_bcmp(const void *mem1, const void *mem2, size_t len) {
+
+  const char *strmem1 = (const char *)mem1;
+  const char *strmem2 = (const char *)mem2;
+
+  while (len--) {
+
+    int diff = *strmem1 ^ *strmem2;
+    if (diff != 0) return 1;
+    strmem1++;
+    strmem2++;
+
+  }
+
+  return 0;
+
+}
+
+char *__libqasan_strstr(const char *haystack, const char *needle) {
+
+  do {
+
+    const char *n = needle;
+    const char *h = haystack;
+
+    while (*n && *h && *n == *h)
+      n++, h++;
+
+    if (!*n) return (char *)haystack;
+
+  } while (*(haystack++));
+
+  return 0;
+
+}
+
+char *__libqasan_strcasestr(const char *haystack, const char *needle) {
+
+  do {
+
+    const char *n = needle;
+    const char *h = haystack;
+
+    while (*n && *h && tolower(*n) == tolower(*h))
+      n++, h++;
+
+    if (!*n) return (char *)haystack;
+
+  } while (*(haystack++));
+
+  return 0;
+
+}
+
+void *__libqasan_memmem(const void *haystack, size_t haystack_len,
+                        const void *needle, size_t needle_len) {
+
+  const char *n = (const char *)needle;
+  const char *h = (const char *)haystack;
+  if (haystack_len < needle_len) return 0;
+  if (needle_len == 0) return (void *)haystack;
+  if (needle_len == 1) return memchr(haystack, *n, haystack_len);
+
+  const char *end = h + (haystack_len - needle_len);
+
+  do {
+
+    if (*h == *n) {
+
+      if (memcmp(h, n, needle_len) == 0) return (void *)h;
+
+    }
+
+  } while (h++ <= end);
+
+  return 0;
+
+}
+
+char *__libqasan_strchr(const char *s, int c) {
+
+  while (*s != (char)c)
+    if (!*s++) return 0;
+  return (char *)s;
+
+}
+
+char *__libqasan_strrchr(const char *s, int c) {
+
+  char *r = NULL;
+  do
+    if (*s == (char)c) r = (char *)s;
+  while (*s++);
+
+  return r;
+
+}
+
+size_t __libqasan_wcslen(const wchar_t *s) {
+
+  size_t len = 0;
+
+  while (s[len] != L'\0') {
+
+    if (s[++len] == L'\0') return len;
+    if (s[++len] == L'\0') return len;
+    if (s[++len] == L'\0') return len;
+    ++len;
+
+  }
+
+  return len;
+
+}
+
+wchar_t *__libqasan_wcscpy(wchar_t *d, const wchar_t *s) {
+
+  wchar_t *a = d;
+  while ((*d++ = *s++))
+    ;
+  return a;
+
+}
+
+int __libqasan_wcscmp(const wchar_t *s1, const wchar_t *s2) {
+
+  wchar_t c1, c2;
+  do {
+
+    c1 = *s1++;
+    c2 = *s2++;
+    if (c2 == L'\0') return c1 - c2;
+
+  } while (c1 == c2);
+
+  return c1 < c2 ? -1 : 1;
+
+}
+
diff --git a/qemu_mode/libqasan/uninstrument.c b/qemu_mode/libqasan/uninstrument.c
new file mode 100644
index 00000000..5bf841a3
--- /dev/null
+++ b/qemu_mode/libqasan/uninstrument.c
@@ -0,0 +1,83 @@
+/*
+
+This code is DEPRECATED!
+I'm keeping it here cause maybe the uninstrumentation of a function is needed
+for some strange reason.
+
+*/
+
+/*******************************************************************************
+Copyright (c) 2019-2020, Andrea Fioraldi
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+#include "libqasan.h"
+#include "map_macro.h"
+#include <sys/types.h>
+#include <pwd.h>
+
+#define X_GET_FNPAR(type, name) name
+#define GET_FNPAR(x) X_GET_FNPAR x
+#define X_GET_FNTYPE(type, name) type
+#define GET_FNTYPE(x) X_GET_FNTYPE x
+#define X_GET_FNDECL(type, name) type name
+#define GET_FNDECL(x) X_GET_FNDECL x
+
+#define HOOK_UNINSTRUMENT(rettype, name, ...)                       \
+  rettype (*__lq_libc_##name)(MAP_LIST(GET_FNTYPE, __VA_ARGS__));   \
+  rettype name(MAP_LIST(GET_FNDECL, __VA_ARGS__)) {                 \
+                                                                    \
+    if (!(__lq_libc_##name)) __lq_libc_##name = ASSERT_DLSYM(name); \
+    int     state = QASAN_SWAP(QASAN_DISABLED);                     \
+    rettype r = __lq_libc_##name(MAP_LIST(GET_FNPAR, __VA_ARGS__)); \
+    QASAN_SWAP(state);                                              \
+                                                                    \
+    return r;                                                       \
+                                                                    \
+  }
+
+HOOK_UNINSTRUMENT(char *, getenv, (const char *, name))
+
+/*
+HOOK_UNINSTRUMENT(char*, setlocale, (int, category), (const char *, locale))
+HOOK_UNINSTRUMENT(int, setenv, (const char *, name), (const char *, value),
+(int, overwrite)) HOOK_UNINSTRUMENT(char*, getenv, (const char *, name))
+HOOK_UNINSTRUMENT(char*, bindtextdomain, (const char *, domainname), (const char
+*, dirname)) HOOK_UNINSTRUMENT(char*, bind_textdomain_codeset, (const char *,
+domainname), (const char *, codeset)) HOOK_UNINSTRUMENT(char*, gettext, (const
+char *, msgid)) HOOK_UNINSTRUMENT(char*, dgettext, (const char *, domainname),
+(const char *, msgid)) HOOK_UNINSTRUMENT(char*, dcgettext, (const char *,
+domainname), (const char *, msgid), (int, category)) HOOK_UNINSTRUMENT(int,
+__gen_tempname, (char, *tmpl), (int, suffixlen), (int, flags), (int, kind))
+HOOK_UNINSTRUMENT(int, mkstemp, (char *, template))
+HOOK_UNINSTRUMENT(int, mkostemp, (char *, template), (int, flags))
+HOOK_UNINSTRUMENT(int, mkstemps, (char *, template), (int, suffixlen))
+HOOK_UNINSTRUMENT(int, mkostemps, (char *, template), (int, suffixlen), (int,
+flags)) HOOK_UNINSTRUMENT(struct passwd *, getpwnam, (const char *, name))
+HOOK_UNINSTRUMENT(struct passwd *, getpwuid, (uid_t, uid))
+HOOK_UNINSTRUMENT(int, getpwnam_r, (const char *, name), (struct passwd *, pwd),
+(char *, buf), (size_t, buflen), (struct passwd **, result))
+HOOK_UNINSTRUMENT(int, getpwuid_r, (uid_t, uid), (struct passwd *, pwd), (char
+*, buf), (size_t, buflen), (struct passwd **, result))
+*/
+
diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl
-Subproject 21ff34383764a8c6f66509b3b8d5282468c721e
+Subproject 9a258d5b7a38c045a6e385fcfcf80a746a60e55
diff --git a/qemu_mode/unsigaction/Makefile b/qemu_mode/unsigaction/Makefile
index 206a8f07..c5d2de31 100644
--- a/qemu_mode/unsigaction/Makefile
+++ b/qemu_mode/unsigaction/Makefile
@@ -16,19 +16,15 @@
 
 _UNIQ=_QINU_
 
-TARGETCANDIDATES=unsigaction32.so unsigaction64.so
+TARGETCANDIDATES=unsigaction.so
 _TARGETS=$(_UNIQ)$(AFL_NO_X86)$(_UNIQ)
 __TARGETS=$(_TARGETS:$(_UNIQ)1$(_UNIQ)=)
 TARGETS=$(__TARGETS:$(_UNIQ)$(_UNIQ)=$(TARGETCANDIDATES))
 
 all:  $(TARGETS)
-	@if [ "$(AFL_NO_X86)" != "" ]; then echo "[!] Note: skipping compilation of unsigaction (AFL_NO_X86 set)."; fi
 
-unsigaction32.so:
-	@if $(CC) -m32 -fPIC -shared unsigaction.c -o unsigaction32.so 2>/dev/null ; then echo "unsigaction32 build success"; else echo "unsigaction32 build failure (that's fine)"; fi
-
-unsigaction64.so:
-	@if $(CC) -m64 -fPIC -shared unsigaction.c -o unsigaction64.so 2>/dev/null ; then echo "unsigaction64 build success"; else echo "unsigaction64 build failure (that's fine)"; fi
+unsigaction.so: unsigaction.c
+	@if $(CC) -fPIC -shared unsigaction.c -o unsigaction.so 2>/dev/null ; then echo "unsigaction build success"; else echo "unsigaction build failure (that's fine)"; fi
 
 clean:
-	rm -f unsigaction32.so unsigaction64.so
+	rm -f unsigaction.so
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index 0af489fe..20aef2da 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -1078,6 +1078,31 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (optind == argc || !in_file) { usage(argv[0]); }
 
+  if (qemu_mode && getenv("AFL_USE_QASAN")) {
+
+    u8 *preload = getenv("AFL_PRELOAD");
+    u8 *libqasan = get_libqasan_path(argv_orig[0]);
+
+    if (!preload) {
+
+      setenv("AFL_PRELOAD", libqasan, 0);
+
+    } else {
+
+      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
+      strcpy(result, libqasan);
+      strcat(result, " ");
+      strcat(result, preload);
+
+      setenv("AFL_PRELOAD", result, 1);
+      ck_free(result);
+
+    }
+
+    ck_free(libqasan);
+
+  }
+
   map_size = get_map_size();
 
   use_hex_offsets = !!get_afl_env("AFL_ANALYZE_HEX");
diff --git a/src/afl-cc.c b/src/afl-cc.c
index f3dfd49f..959c9a6f 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -313,17 +313,11 @@ static u8 *find_object(u8 *obj, u8 *argv0) {
 static void edit_params(u32 argc, char **argv, char **envp) {
 
   u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0, shared_linking = 0,
-     preprocessor_only = 0, have_unroll = 0, have_o = 0, have_pic = 0;
-  u8 *name;
+     preprocessor_only = 0, have_unroll = 0, have_o = 0, have_pic = 0,
+     have_c = 0;
 
   cc_params = ck_alloc((argc + 128) * sizeof(u8 *));
 
-  name = strrchr(argv[0], '/');
-  if (!name)
-    name = argv[0];
-  else
-    ++name;
-
   if (lto_mode) {
 
     if (lto_flag[0] != '-')
@@ -461,7 +455,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     // laf
     if (getenv("LAF_SPLIT_SWITCHES") || getenv("AFL_LLVM_LAF_SPLIT_SWITCHES")) {
 
-      if (lto_mode) {
+      if (lto_mode && !have_c) {
 
         cc_params[cc_par_cnt++] = alloc_printf(
             "-Wl,-mllvm=-load=%s/split-switches-pass.so", obj_path);
@@ -481,7 +475,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     if (getenv("LAF_TRANSFORM_COMPARES") ||
         getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES")) {
 
-      if (lto_mode) {
+      if (lto_mode && !have_c) {
 
         cc_params[cc_par_cnt++] = alloc_printf(
             "-Wl,-mllvm=-load=%s/compare-transform-pass.so", obj_path);
@@ -501,7 +495,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     if (getenv("LAF_SPLIT_COMPARES") || getenv("AFL_LLVM_LAF_SPLIT_COMPARES") ||
         getenv("AFL_LLVM_LAF_SPLIT_FLOATS")) {
 
-      if (lto_mode) {
+      if (lto_mode && !have_c) {
 
         cc_params[cc_par_cnt++] = alloc_printf(
             "-Wl,-mllvm=-load=%s/split-compares-pass.so", obj_path);
@@ -524,14 +518,14 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     unsetenv("AFL_LD_CALLER");
     if (cmplog_mode) {
 
-      if (lto_mode) {
+      if (lto_mode && !have_c) {
 
         cc_params[cc_par_cnt++] = alloc_printf(
             "-Wl,-mllvm=-load=%s/cmplog-routines-pass.so", obj_path);
         cc_params[cc_par_cnt++] = alloc_printf(
-            "-Wl,-mllvm=-load=%s/split-switches-pass.so", obj_path);
-        cc_params[cc_par_cnt++] = alloc_printf(
             "-Wl,-mllvm=-load=%s/cmplog-instructions-pass.so", obj_path);
+        cc_params[cc_par_cnt++] = alloc_printf(
+            "-Wl,-mllvm=-load=%s/split-switches-pass.so", obj_path);
 
       } else {
 
@@ -541,18 +535,18 @@ static void edit_params(u32 argc, char **argv, char **envp) {
         cc_params[cc_par_cnt++] =
             alloc_printf("%s/cmplog-routines-pass.so", obj_path);
 
-        // reuse split switches from laf
         cc_params[cc_par_cnt++] = "-Xclang";
         cc_params[cc_par_cnt++] = "-load";
         cc_params[cc_par_cnt++] = "-Xclang";
         cc_params[cc_par_cnt++] =
-            alloc_printf("%s/split-switches-pass.so", obj_path);
+            alloc_printf("%s/cmplog-instructions-pass.so", obj_path);
 
+        // reuse split switches from laf
         cc_params[cc_par_cnt++] = "-Xclang";
         cc_params[cc_par_cnt++] = "-load";
         cc_params[cc_par_cnt++] = "-Xclang";
         cc_params[cc_par_cnt++] =
-            alloc_printf("%s/cmplog-instructions-pass.so", obj_path);
+            alloc_printf("%s/split-switches-pass.so", obj_path);
 
       }
 
@@ -560,7 +554,12 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
     }
 
-    if (lto_mode) {
+#if LLVM_MAJOR >= 13
+    // fuck you llvm 13
+    cc_params[cc_par_cnt++] = "-fno-experimental-new-pass-manager";
+#endif
+
+    if (lto_mode && !have_c) {
 
       u8 *ld_path = strdup(AFL_REAL_LD);
       if (!*ld_path) ld_path = "ld.lld";
@@ -572,7 +571,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
       cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition";
 
-      if (instrument_mode == INSTRUMENT_CFG)
+      if (instrument_mode == INSTRUMENT_CFG ||
+          instrument_mode == INSTRUMENT_PCGUARD)
         cc_params[cc_par_cnt++] = alloc_printf(
             "-Wl,-mllvm=-load=%s/SanitizerCoverageLTO.so", obj_path);
       else
@@ -586,9 +586,9 @@ static void edit_params(u32 argc, char **argv, char **envp) {
       if (instrument_mode == INSTRUMENT_PCGUARD) {
 
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-#ifdef __ANDROID__
+  #ifdef __ANDROID__
         cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard";
-#else
+  #else
         if (have_instr_list) {
 
           if (!be_quiet)
@@ -608,7 +608,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
         }
 
-#endif
+  #endif
 #else
   #if LLVM_MAJOR >= 4
         if (!be_quiet)
@@ -687,8 +687,42 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     if (!strncmp(cur, "--afl", 5)) continue;
     if (lto_mode && !strncmp(cur, "-fuse-ld=", 9)) continue;
     if (lto_mode && !strncmp(cur, "--ld-path=", 10)) continue;
+    if (!strncmp(cur, "-fno-unroll", 11)) continue;
+    if (strstr(cur, "afl-compiler-rt") || strstr(cur, "afl-llvm-rt")) continue;
     if (!strcmp(cur, "-Wl,-z,defs") || !strcmp(cur, "-Wl,--no-undefined"))
       continue;
+    if (!strncmp(cur, "-fsanitize=fuzzer-", strlen("-fsanitize=fuzzer-")) ||
+        !strncmp(cur, "-fsanitize-coverage", strlen("-fsanitize-coverage"))) {
+
+      if (!be_quiet) { WARNF("Found '%s' - stripping!", cur); }
+      continue;
+
+    }
+
+    if (!strcmp(cur, "-fsanitize=fuzzer")) {
+
+      u8 *afllib = find_object("libAFLDriver.a", argv[0]);
+
+      if (!be_quiet)
+        WARNF(
+            "Found erroneous '-fsanitize=fuzzer', trying to replace with "
+            "libAFLDriver.a");
+
+      if (!afllib) {
+
+        WARNF(
+            "Cannot find 'libAFLDriver.a' to replace a wrong "
+            "'-fsanitize=fuzzer' in the flags - this will fail!");
+
+      } else {
+
+        cc_params[cc_par_cnt++] = afllib;
+
+      }
+
+      continue;
+
+    }
 
     if (!strcmp(cur, "-m32")) bit_mode = 32;
     if (!strcmp(cur, "armv7a-linux-androideabi")) bit_mode = 32;
@@ -705,9 +739,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     if (!strcmp(cur, "-x")) x_set = 1;
     if (!strcmp(cur, "-E")) preprocessor_only = 1;
     if (!strcmp(cur, "-shared")) shared_linking = 1;
+    if (!strcmp(cur, "-c")) have_c = 1;
 
     if (!strncmp(cur, "-O", 2)) have_o = 1;
-    if (!strncmp(cur, "-f", 2) && strstr(cur, "unroll-loop")) have_unroll = 1;
+    if (!strncmp(cur, "-funroll-loop", 13)) have_unroll = 1;
 
     cc_params[cc_par_cnt++] = cur;
 
@@ -796,10 +831,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-#if defined(USEMMAP)
-  #if !defined(__HAIKU__)
-  cc_params[cc_par_cnt++] = "-lrt";
-  #endif
+#if defined(USEMMAP) && !defined(__HAIKU__)
+  if (!have_c) cc_params[cc_par_cnt++] = "-lrt";
 #endif
 
   cc_params[cc_par_cnt++] = "-D__AFL_HAVE_MANUAL_CONTROL=1";
@@ -966,17 +999,18 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     }
 
   #if !defined(__APPLE__) && !defined(__sun)
-    if (!shared_linking)
+    if (!shared_linking && !have_c)
       cc_params[cc_par_cnt++] =
           alloc_printf("-Wl,--dynamic-list=%s/dynamic_list.txt", obj_path);
   #endif
 
-  #if defined(USEMMAP)
-    #if !defined(__HAIKU__)
-    cc_params[cc_par_cnt++] = "-lrt";
-    #endif
+  #if defined(USEMMAP) && !defined(__HAIKU__)
+    if (!have_c) cc_params[cc_par_cnt++] = "-lrt";
   #endif
 
+    // prevent unnecessary build errors
+    cc_params[cc_par_cnt++] = "-Wno-unused-command-line-argument";
+
   }
 
 #endif
@@ -1039,7 +1073,7 @@ int main(int argc, char **argv, char **envp) {
 #endif
 
 #ifdef __ANDROID__
-    have_llvm = 1;
+  have_llvm = 1;
 #endif
 
   if ((ptr = find_object("afl-gcc-pass.so", argv[0])) != NULL) {
@@ -1286,7 +1320,6 @@ int main(int argc, char **argv, char **envp) {
 
       }
 
-      // this is a hidden option
       if (strncasecmp(ptr2, "llvmnative", strlen("llvmnative")) == 0 ||
           strncasecmp(ptr2, "llvm-native", strlen("llvm-native")) == 0) {
 
@@ -1357,29 +1390,28 @@ int main(int argc, char **argv, char **envp) {
 
       if (strncasecmp(ptr2, "ngram", strlen("ngram")) == 0) {
 
-        ptr2 += strlen("ngram");
-        while (*ptr2 && (*ptr2 < '0' || *ptr2 > '9'))
-          ptr2++;
+        u8 *ptr3 = ptr2 + strlen("ngram");
+        while (*ptr3 && (*ptr3 < '0' || *ptr3 > '9'))
+          ptr3++;
 
-        if (!*ptr2) {
+        if (!*ptr3) {
 
-          if ((ptr2 = getenv("AFL_LLVM_NGRAM_SIZE")) == NULL)
+          if ((ptr3 = getenv("AFL_LLVM_NGRAM_SIZE")) == NULL)
             FATAL(
                 "you must set the NGRAM size with (e.g. for value 2) "
                 "AFL_LLVM_INSTRUMENT=ngram-2");
 
         }
 
-        ngram_size = atoi(ptr2);
+        ngram_size = atoi(ptr3);
         if (ngram_size < 2 || ngram_size > NGRAM_SIZE_MAX)
           FATAL(
               "NGRAM instrumentation option must be between 2 and "
-              "NGRAM_SIZE_MAX "
-              "(%u)",
+              "NGRAM_SIZE_MAX (%u)",
               NGRAM_SIZE_MAX);
         instrument_opt_mode |= (INSTRUMENT_OPT_NGRAM);
-        ptr2 = alloc_printf("%u", ngram_size);
-        setenv("AFL_LLVM_NGRAM_SIZE", ptr2, 1);
+        u8 *ptr4 = alloc_printf("%u", ngram_size);
+        setenv("AFL_LLVM_NGRAM_SIZE", ptr4, 1);
 
       }
 
@@ -1443,34 +1475,34 @@ int main(int argc, char **argv, char **envp) {
         "  CC=afl-cc CXX=afl-c++ meson\n\n");
 
     SAYF(
-        "                                     |---------------- FEATURES "
-        "---------------|\n"
-        "MODES:                                NCC PERSIST SNAP DICT   LAF "
+        "                                       |------------- FEATURES "
+        "-------------|\n"
+        "MODES:                                  NCC PERSIST DICT   LAF "
         "CMPLOG SELECT\n"
         "  [LTO] llvm LTO:          %s%s\n"
-        "      PCGUARD              DEFAULT    yes yes     yes  yes    yes yes "
+        "      PCGUARD              DEFAULT      yes yes     yes    yes yes "
         "   yes\n"
-        "      CLASSIC                         yes yes     yes  yes    yes yes "
+        "      CLASSIC                           yes yes     yes    yes yes "
         "   yes\n"
         "  [LLVM] llvm:             %s%s\n"
-        "      PCGUARD              %s    yes yes     yes  module yes yes    "
+        "      PCGUARD              %s      yes yes     module yes yes    "
         "extern\n"
-        "      CLASSIC              %s    no  yes     yes  module yes yes    "
+        "      CLASSIC              %s      no  yes     module yes yes    "
         "yes\n"
         "        - NORMAL\n"
         "        - CTX\n"
         "        - NGRAM-{2-16}\n"
-        "      INSTRIM                         no  yes     yes  module yes yes "
+        "      INSTRIM                           no  yes     module yes yes "
         "   yes\n"
         "        - NORMAL\n"
         "        - CTX\n"
         "        - NGRAM-{2-16}\n"
         "  [GCC_PLUGIN] gcc plugin: %s%s\n"
-        "      CLASSIC              DEFAULT    no  yes     yes  no     no  no  "
-        "   yes\n"
+        "      CLASSIC              DEFAULT      no  yes     no     no  no     "
+        "yes\n"
         "  [GCC/CLANG] simple gcc/clang: %s%s\n"
-        "      CLASSIC              DEFAULT    no  no      no   no     no  no  "
-        "   no\n\n",
+        "      CLASSIC              DEFAULT      no  no      no     no  no     "
+        "no\n\n",
         have_lto ? "AVAILABLE" : "unavailable!",
         compiler_mode == LTO ? " [SELECTED]" : "",
         have_llvm ? "AVAILABLE" : "unavailable!",
@@ -1515,6 +1547,7 @@ int main(int argc, char **argv, char **envp) {
         "((instrumentation/README.ngram.md)\n"
         "  INSTRIM: Dominator tree (for LLVM <= 6.0) "
         "(instrumentation/README.instrim.md)\n\n");
+
 #undef NATIVE_MSG
 
     SAYF(
@@ -1524,9 +1557,6 @@ int main(int argc, char **argv, char **envp) {
         "          (instrumentation/README.lto.md)\n"
         "  PERSIST: persistent mode support [code] (huge speed increase!)\n"
         "          (instrumentation/README.persistent_mode.md)\n"
-        "  SNAP:   linux lkm snapshot module support [automatic] (speed "
-        "increase)\n"
-        "          (https://github.com/AFLplusplus/AFL-Snapshot-LKM/)\n"
         "  DICT:   dictionary in the target [yes=automatic or llvm module "
         "pass]\n"
         "          (instrumentation/README.lto.md + "
@@ -1557,6 +1587,7 @@ int main(int argc, char **argv, char **envp) {
           "libtokencap.so)\n"
           "  AFL_PATH: path to instrumenting pass and runtime  "
           "(afl-compiler-rt.*o)\n"
+          "  AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n"
           "  AFL_INST_RATIO: percentage of branches to instrument\n"
           "  AFL_QUIET: suppress verbose output\n"
           "  AFL_HARDEN: adds code hardening to catch memory bugs\n"
@@ -1649,16 +1680,15 @@ int main(int argc, char **argv, char **envp) {
     if (have_lto)
       SAYF("afl-cc LTO with ld=%s %s\n", AFL_REAL_LD, AFL_CLANG_FLTO);
     if (have_llvm)
-      SAYF("afl-cc LLVM version %d using binary path \"%s\".\n", LLVM_MAJOR,
+      SAYF("afl-cc LLVM version %d using the binary path \"%s\".\n", LLVM_MAJOR,
            LLVM_BINDIR);
 #endif
 
-#if defined(USEMMAP)
+#ifdef USEMMAP
   #if !defined(__HAIKU__)
-    cc_params[cc_par_cnt++] = "-lrt";
-    SAYF("Compiled with shm_open support (adds -lrt when linking).\n");
-  #else
     SAYF("Compiled with shm_open support.\n");
+  #else
+    SAYF("Compiled with shm_open support (adds -lrt when linking).\n");
   #endif
 #else
     SAYF("Compiled with shmat support.\n");
@@ -1678,15 +1708,16 @@ int main(int argc, char **argv, char **envp) {
   if (compiler_mode == LTO) {
 
     if (instrument_mode == 0 || instrument_mode == INSTRUMENT_LTO ||
-        instrument_mode == INSTRUMENT_CFG) {
+        instrument_mode == INSTRUMENT_CFG ||
+        instrument_mode == INSTRUMENT_PCGUARD) {
 
       lto_mode = 1;
-      if (!instrument_mode) {
-
-        instrument_mode = INSTRUMENT_CFG;
-        // ptr = instrument_mode_string[instrument_mode];
+      // force CFG
+      // if (!instrument_mode) {
 
-      }
+      instrument_mode = INSTRUMENT_PCGUARD;
+      // ptr = instrument_mode_string[instrument_mode];
+      // }
 
     } else if (instrument_mode == INSTRUMENT_LTO ||
 
@@ -1793,12 +1824,23 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if (!be_quiet && (compiler_mode == GCC || compiler_mode == CLANG)) {
+
+    WARNF(
+        "You are using outdated instrumentation, install LLVM and/or "
+        "gcc-plugin and use afl-clang-fast/afl-clang-lto/afl-gcc-fast "
+        "instead!");
+
+  }
+
   if (debug) {
 
     DEBUGF("cd '%s';", getthecwd());
     for (i = 0; i < argc; i++)
       SAYF(" '%s'", argv[i]);
     SAYF("\n");
+    fflush(stdout);
+    fflush(stderr);
 
   }
 
@@ -1833,12 +1875,16 @@ int main(int argc, char **argv, char **envp) {
 
   edit_params(argc, argv, envp);
 
+  if (lto_mode) { setenv("_AFL_LTO_COMPILE", "1", 1); }
+
   if (debug) {
 
     DEBUGF("cd '%s';", getthecwd());
     for (i = 0; i < (s32)cc_par_cnt; i++)
       SAYF(" '%s'", cc_params[i]);
     SAYF("\n");
+    fflush(stdout);
+    fflush(stderr);
 
   }
 
diff --git a/src/afl-common.c b/src/afl-common.c
index cf996548..589aac71 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -334,6 +334,66 @@ char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
 
 }
 
+/* Get libqasan path. */
+
+u8 *get_libqasan_path(u8 *own_loc) {
+
+  if (!unlikely(own_loc)) { FATAL("BUG: param own_loc is NULL"); }
+
+  u8 *tmp, *cp = NULL, *rsl, *own_copy;
+
+  tmp = getenv("AFL_PATH");
+
+  if (tmp) {
+
+    cp = alloc_printf("%s/libqasan.so", tmp);
+
+    if (access(cp, X_OK)) { FATAL("Unable to find '%s'", tmp); }
+
+    return cp;
+
+  }
+
+  own_copy = ck_strdup(own_loc);
+  rsl = strrchr(own_copy, '/');
+
+  if (rsl) {
+
+    *rsl = 0;
+
+    cp = alloc_printf("%s/libqasan.so", own_copy);
+    ck_free(own_copy);
+
+    if (!access(cp, X_OK)) { return cp; }
+
+  } else {
+
+    ck_free(own_copy);
+
+  }
+
+  if (!access(BIN_PATH "/libqasan.so", X_OK)) {
+
+    if (cp) { ck_free(cp); }
+
+    return ck_strdup(BIN_PATH "/libqasan.so");
+
+  }
+
+  SAYF("\n" cLRD "[-] " cRST
+       "Oops, unable to find the 'libqasan.so' binary. The binary must be "
+       "built\n"
+       "    separately by following the instructions in "
+       "qemu_mode/libqasan/README.md. "
+       "If you\n"
+       "    already have the binary installed, you may need to specify "
+       "AFL_PATH in the\n"
+       "    environment.\n");
+
+  FATAL("Failed to locate 'libqasan.so'.");
+
+}
+
 /* Find binary, used by analyze, showmap, tmin
    @returns the path, allocating the string */
 
@@ -463,7 +523,7 @@ void check_environment_vars(char **envp) {
   if (be_quiet) { return; }
 
   int   index = 0, issue_detected = 0;
-  char *env, *val;
+  char *env, *val, *ignore = getenv("AFL_IGNORE_UNKNOWN_ENVS");
   while ((env = envp[index++]) != NULL) {
 
     if (strncmp(env, "ALF_", 4) == 0 || strncmp(env, "_ALF", 4) == 0 ||
@@ -522,7 +582,7 @@ void check_environment_vars(char **envp) {
 
       }
 
-      if (match == 0) {
+      if (match == 0 && !ignore) {
 
         WARNF("Mistyped AFL environment variable: %s", env);
         issue_detected = 1;
@@ -921,7 +981,7 @@ u8 *u_stringify_time_diff(u8 *buf, u64 cur_ms, u64 event_ms) {
 /* Reads the map size from ENV */
 u32 get_map_size(void) {
 
-  uint32_t map_size = MAP_SIZE;
+  uint32_t map_size = (MAP_SIZE << 2);  // needed for target ctors :(
   char *   ptr;
 
   if ((ptr = getenv("AFL_MAP_SIZE")) || (ptr = getenv("AFL_MAPSIZE"))) {
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 39f044f2..9ee59822 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -58,7 +58,7 @@ static list_t fsrv_list = {.element_prealloc_count = 0};
 
 static void fsrv_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
-  if (fsrv->qemu_mode) setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0);
+  if (fsrv->qemu_mode) { setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0); }
 
   execv(fsrv->target_path, argv);
 
@@ -91,7 +91,7 @@ void afl_fsrv_init(afl_forkserver_t *fsrv) {
   fsrv->map_size = get_map_size();
   fsrv->use_fauxsrv = false;
   fsrv->last_run_timed_out = false;
-
+  fsrv->debug = false;
   fsrv->uses_crash_exitcode = false;
   fsrv->uses_asan = false;
 
@@ -117,6 +117,7 @@ void afl_fsrv_init_dup(afl_forkserver_t *fsrv_to, afl_forkserver_t *from) {
   fsrv_to->uses_crash_exitcode = from->uses_crash_exitcode;
   fsrv_to->crash_exitcode = from->crash_exitcode;
   fsrv_to->kill_signal = from->kill_signal;
+  fsrv_to->debug = from->debug;
 
   // These are forkserver specific.
   fsrv_to->out_dir_fd = -1;
@@ -396,6 +397,12 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
     struct rlimit r;
 
+    if (!fsrv->cmplog_binary && fsrv->qemu_mode == false) {
+
+      unsetenv(CMPLOG_SHM_ENV_VAR);  // we do not want that in non-cmplog fsrv
+
+    }
+
     /* Umpf. On OpenBSD, the default fd limit for root users is set to
        soft 128. Let's try to fix that... */
     if (!getrlimit(RLIMIT_NOFILE, &r) && r.rlim_cur < FORKSRV_FD + 2) {
@@ -478,38 +485,45 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
     /* Set sane defaults for ASAN if nothing else specified. */
 
-    setenv("ASAN_OPTIONS",
-           "abort_on_error=1:"
-           "detect_leaks=0:"
-           "malloc_context_size=0:"
-           "symbolize=0:"
-           "allocator_may_return_null=1:"
-           "handle_segv=0:"
-           "handle_sigbus=0:"
-           "handle_abort=0:"
-           "handle_sigfpe=0:"
-           "handle_sigill=0",
-           0);
+    if (fsrv->debug == true && !getenv("ASAN_OPTIONS"))
+      setenv("ASAN_OPTIONS",
+             "abort_on_error=1:"
+             "detect_leaks=0:"
+             "malloc_context_size=0:"
+             "symbolize=0:"
+             "allocator_may_return_null=1:"
+             "handle_segv=0:"
+             "handle_sigbus=0:"
+             "handle_abort=0:"
+             "handle_sigfpe=0:"
+             "handle_sigill=0",
+             0);
 
     /* Set sane defaults for UBSAN if nothing else specified. */
 
-    setenv("UBSAN_OPTIONS",
-           "halt_on_error=1:"
-           "abort_on_error=1:"
-           "malloc_context_size=0:"
-           "allocator_may_return_null=1:"
-           "symbolize=0:"
-           "handle_segv=0:"
-           "handle_sigbus=0:"
-           "handle_abort=0:"
-           "handle_sigfpe=0:"
-           "handle_sigill=0",
-           0);
+    if (fsrv->debug == true && !getenv("UBSAN_OPTIONS"))
+      setenv("UBSAN_OPTIONS",
+             "halt_on_error=1:"
+             "abort_on_error=1:"
+             "malloc_context_size=0:"
+             "allocator_may_return_null=1:"
+             "symbolize=0:"
+             "handle_segv=0:"
+             "handle_sigbus=0:"
+             "handle_abort=0:"
+             "handle_sigfpe=0:"
+             "handle_sigill=0",
+             0);
+
+    /* Envs for QASan */
+    setenv("QASAN_MAX_CALL_STACK", "0", 0);
+    setenv("QASAN_SYMBOLIZE", "0", 0);
 
     /* MSAN is tricky, because it doesn't support abort_on_error=1 at this
        point. So, we do this in a very hacky way. */
 
-    setenv("MSAN_OPTIONS",
+    if (fsrv->debug == true && !getenv("MSAN_OPTIONS"))
+      setenv("MSAN_OPTIONS",
            "exit_code=" STRINGIFY(MSAN_ERROR) ":"
            "symbolize=0:"
            "abort_on_error=1:"
@@ -668,11 +682,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
       if ((status & FS_OPT_AUTODICT) == FS_OPT_AUTODICT) {
 
-        if (ignore_autodict) {
-
-          if (!be_quiet) { WARNF("Ignoring offered AUTODICT feature."); }
-
-        } else {
+        if (!ignore_autodict) {
 
           if (fsrv->add_extra_func == NULL || fsrv->afl_ptr == NULL) {
 
@@ -955,7 +965,9 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
 }
 
-static void afl_fsrv_kill(afl_forkserver_t *fsrv) {
+/* Stop the forkserver and child */
+
+void afl_fsrv_kill(afl_forkserver_t *fsrv) {
 
   if (fsrv->child_pid > 0) { kill(fsrv->child_pid, fsrv->kill_signal); }
   if (fsrv->fsrv_pid > 0) {
@@ -965,13 +977,28 @@ static void afl_fsrv_kill(afl_forkserver_t *fsrv) {
 
   }
 
+  close(fsrv->fsrv_ctl_fd);
+  close(fsrv->fsrv_st_fd);
+  fsrv->fsrv_pid = -1;
+  fsrv->child_pid = -1;
+
+}
+
+/* Get the map size from the target forkserver */
+
+u32 afl_fsrv_get_mapsize(afl_forkserver_t *fsrv, char **argv,
+                         volatile u8 *stop_soon_p, u8 debug_child_output) {
+
+  afl_fsrv_start(fsrv, argv, stop_soon_p, debug_child_output);
+  return fsrv->map_size;
+
 }
 
 /* Delete the current testcase and write the buf to the testcase file */
 
 void afl_fsrv_write_to_testcase(afl_forkserver_t *fsrv, u8 *buf, size_t len) {
 
-  if (fsrv->shmem_fuzz) {
+  if (likely(fsrv->use_shmem_fuzz && fsrv->shmem_fuzz)) {
 
     if (unlikely(len > MAX_FILE)) len = MAX_FILE;
 
@@ -1028,6 +1055,7 @@ void afl_fsrv_write_to_testcase(afl_forkserver_t *fsrv, u8 *buf, size_t len) {
 
     }
 
+    // fprintf(stderr, "WRITE %d %u\n", fd, len);
     ck_write(fd, buf, len, fsrv->out_file);
 
     if (fsrv->use_stdin) {
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index 586f3990..4ed59364 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -325,7 +325,8 @@ u8 *describe_op(afl_state_t *afl, u8 new_bits, size_t max_description_len) {
 
     }
 
-    sprintf(ret + strlen(ret), ",time:%llu", get_cur_time() - afl->start_time);
+    sprintf(ret + strlen(ret), ",time:%llu",
+            get_cur_time() + afl->prev_run_time - afl->start_time);
 
     if (afl->current_custom_fuzz &&
         afl->current_custom_fuzz->afl_custom_describe) {
@@ -700,11 +701,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-        if (!classified) {
-
-          classify_counts(&afl->fsrv);
-
-        }
+        if (!classified) { classify_counts(&afl->fsrv); }
 
         simplify_trace(afl, afl->fsrv.trace_bits);
 
diff --git a/src/afl-fuzz-cmplog.c b/src/afl-fuzz-cmplog.c
index 8ffc6e1b..27c6c413 100644
--- a/src/afl-fuzz-cmplog.c
+++ b/src/afl-fuzz-cmplog.c
@@ -33,6 +33,8 @@ void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
   setenv("___AFL_EINS_ZWEI_POLIZEI___", "1", 1);
 
+  if (fsrv->qemu_mode) { setenv("AFL_DISABLE_LLVM_INSTRUMENTATION", "1", 0); }
+
   if (!fsrv->qemu_mode && argv[0] != fsrv->cmplog_binary) {
 
     argv[0] = fsrv->cmplog_binary;
diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c
index a3583651..7ecad233 100644
--- a/src/afl-fuzz-extras.c
+++ b/src/afl-fuzz-extras.c
@@ -413,7 +413,7 @@ void dedup_extras(afl_state_t *afl) {
         if (j + 1 < afl->extras_cnt)  // not at the end of the list?
           memmove((char *)&afl->extras[j], (char *)&afl->extras[j + 1],
                   (afl->extras_cnt - j - 1) * sizeof(struct extra_data));
-        afl->extras_cnt--;
+        --afl->extras_cnt;
         goto restart_dedup;  // restart if several duplicates are in a row
 
       }
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index dbffa4f9..702e732d 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -25,6 +25,7 @@
 
 #include "afl-fuzz.h"
 #include <limits.h>
+#include "cmplog.h"
 
 #ifdef HAVE_AFFINITY
 
@@ -460,6 +461,7 @@ void read_foreign_testcases(afl_state_t *afl, int first) {
   u32             i, iter;
 
   u8 val_buf[2][STRINGIFY_VAL_SIZE_MAX];
+  u8 foreign_name[16];
 
   for (iter = 0; iter < afl->foreign_sync_cnt; iter++) {
 
@@ -467,11 +469,22 @@ void read_foreign_testcases(afl_state_t *afl, int first) {
         afl->foreign_syncs[iter].dir[0] != 0) {
 
       if (first) ACTF("Scanning '%s'...", afl->foreign_syncs[iter].dir);
-      time_t ctime_max = 0;
+      time_t mtime_max = 0;
+      u8 *   name = strrchr(afl->foreign_syncs[iter].dir, '/');
+      if (!name) { name = afl->foreign_syncs[iter].dir; }
+      if (!strcmp(name, "queue") || !strcmp(name, "out") ||
+          !strcmp(name, "default")) {
 
-      /* We use scandir() + alphasort() rather than readdir() because otherwise,
-         the ordering of test cases would vary somewhat randomly and would be
-         difficult to control. */
+        snprintf(foreign_name, sizeof(foreign_name), "foreign_%u", iter);
+
+      } else {
+
+        snprintf(foreign_name, sizeof(foreign_name), "%s_%u", name, iter);
+
+      }
+
+      /* We do not use sorting yet and do a more expensive mtime check instead.
+         a mtimesort() implementation would be better though. */
 
       nl_cnt = scandir(afl->foreign_syncs[iter].dir, &nl, NULL, NULL);
 
@@ -525,8 +538,8 @@ void read_foreign_testcases(afl_state_t *afl, int first) {
 
         }
 
-        /* we detect new files by their ctime */
-        if (likely(st.st_ctime <= afl->foreign_syncs[iter].ctime)) {
+        /* we detect new files by their mtime */
+        if (likely(st.st_mtime <= afl->foreign_syncs[iter].mtime)) {
 
           ck_free(fn2);
           continue;
@@ -581,18 +594,18 @@ void read_foreign_testcases(afl_state_t *afl, int first) {
 
         write_to_testcase(afl, mem, st.st_size);
         fault = fuzz_run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
-        afl->syncing_party = "foreign";
+        afl->syncing_party = foreign_name;
         afl->queued_imported +=
             save_if_interesting(afl, mem, st.st_size, fault);
         afl->syncing_party = 0;
         munmap(mem, st.st_size);
         close(fd);
 
-        if (st.st_ctime > ctime_max) ctime_max = st.st_ctime;
+        if (st.st_mtime > mtime_max) mtime_max = st.st_mtime;
 
       }
 
-      afl->foreign_syncs[iter].ctime = ctime_max;
+      afl->foreign_syncs[iter].mtime = mtime_max;
       free(nl);                                              /* not tracked */
 
     }
@@ -729,14 +742,41 @@ void read_testcases(afl_state_t *afl, u8 *directory) {
     add_to_queue(afl, fn2, st.st_size >= MAX_FILE ? MAX_FILE : st.st_size,
                  passed_det);
 
-    if (unlikely(afl->schedule >= FAST && afl->schedule <= RARE)) {
+    if (unlikely(afl->shm.cmplog_mode)) {
+
+      if (afl->cmplog_lvl == 1) {
+
+        if (!afl->cmplog_max_filesize ||
+            afl->cmplog_max_filesize < st.st_size) {
+
+          afl->cmplog_max_filesize = st.st_size;
+
+        }
+
+      } else if (afl->cmplog_lvl == 2) {
+
+        if (!afl->cmplog_max_filesize ||
+            afl->cmplog_max_filesize > st.st_size) {
+
+          afl->cmplog_max_filesize = st.st_size;
+
+        }
 
-      u64 cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
-      afl->queue_top->n_fuzz_entry = cksum % N_FUZZ_SIZE;
-      afl->n_fuzz[afl->queue_top->n_fuzz_entry] = 1;
+      }
 
     }
 
+    /*
+        if (unlikely(afl->schedule >= FAST && afl->schedule <= RARE)) {
+
+          u64 cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size,
+       HASH_CONST); afl->queue_top->n_fuzz_entry = cksum % N_FUZZ_SIZE;
+          afl->n_fuzz[afl->queue_top->n_fuzz_entry] = 1;
+
+        }
+
+    */
+
   }
 
   free(nl);                                                  /* not tracked */
@@ -756,6 +796,20 @@ void read_testcases(afl_state_t *afl, u8 *directory) {
 
   }
 
+  if (unlikely(afl->shm.cmplog_mode)) {
+
+    if (afl->cmplog_max_filesize < 1024) {
+
+      afl->cmplog_max_filesize = 1024;
+
+    } else {
+
+      afl->cmplog_max_filesize = (((afl->cmplog_max_filesize >> 10) + 1) << 10);
+
+    }
+
+  }
+
   afl->last_path_time = 0;
   afl->queued_at_start = afl->queued_paths;
 
@@ -766,13 +820,16 @@ void read_testcases(afl_state_t *afl, u8 *directory) {
 
 void perform_dry_run(afl_state_t *afl) {
 
-  struct queue_entry *q = afl->queue;
-  u32                 cal_failures = 0;
+  struct queue_entry *q;
+  u32                 cal_failures = 0, idx;
   u8 *                skip_crashes = afl->afl_env.afl_skip_crashes;
+  u8 *                use_mem;
 
-  while (q) {
+  for (idx = 0; idx < afl->queued_paths; idx++) {
+
+    q = afl->queue_buf[idx];
+    if (unlikely(q->disabled)) { continue; }
 
-    u8  use_mem[MAX_FILE];
     u8  res;
     s32 fd;
 
@@ -783,6 +840,8 @@ void perform_dry_run(afl_state_t *afl) {
 
     }
 
+    if (afl->afl_env.afl_cmplog_only_new) { q->colorized = CMPLOG_LVL_MAX; }
+
     u8 *fn = strrchr(q->fname, '/') + 1;
 
     ACTF("Attempting dry run with '%s'...", fn);
@@ -791,6 +850,7 @@ void perform_dry_run(afl_state_t *afl) {
     if (fd < 0) { PFATAL("Unable to open '%s'", q->fname); }
 
     u32 read_len = MIN(q->len, (u32)MAX_FILE);
+    use_mem = afl_realloc(AFL_BUF_PARAM(in), read_len);
     if (read(fd, use_mem, read_len) != (ssize_t)read_len) {
 
       FATAL("Short read from '%s'", q->fname);
@@ -987,25 +1047,33 @@ void perform_dry_run(afl_state_t *afl) {
         /* Remove from fuzzing queue but keep for splicing */
 
         struct queue_entry *p = afl->queue;
+
+        if (!p->was_fuzzed) {
+
+          p->was_fuzzed = 1;
+          --afl->pending_not_fuzzed;
+          --afl->active_paths;
+
+        }
+
         p->disabled = 1;
         p->perf_score = 0;
-        while (p && p->next != q)
-          p = p->next;
 
-        if (p)
-          p->next = q->next;
-        else
-          afl->queue = q->next;
+        u32 i = 0;
+        while (unlikely(afl->queue_buf[i]->disabled)) {
+
+          ++i;
 
-        --afl->pending_not_fuzzed;
-        --afl->active_paths;
+        }
+
+        afl->queue = afl->queue_buf[i];
 
         afl->max_depth = 0;
-        p = afl->queue;
-        while (p) {
+        for (i = 0; i < afl->queued_paths; i++) {
 
-          if (p->depth > afl->max_depth) afl->max_depth = p->depth;
-          p = p->next;
+          if (!afl->queue_buf[i]->disabled &&
+              afl->queue_buf[i]->depth > afl->max_depth)
+            afl->max_depth = afl->queue_buf[i]->depth;
 
         }
 
@@ -1038,8 +1106,6 @@ void perform_dry_run(afl_state_t *afl) {
 
     }
 
-    q = q->next;
-
   }
 
   if (cal_failures) {
@@ -1065,74 +1131,69 @@ void perform_dry_run(afl_state_t *afl) {
 
   /* Now we remove all entries from the queue that have a duplicate trace map */
 
-  q = afl->queue;
-  struct queue_entry *p, *prev = NULL;
-  int                 duplicates = 0;
-
-restart_outer_cull_loop:
+  u32 duplicates = 0, i;
 
-  while (q) {
+  for (idx = 0; idx < afl->queued_paths; idx++) {
 
-    if (q->cal_failed || !q->exec_cksum) { goto next_entry; }
+    q = afl->queue_buf[idx];
+    if (q->disabled || q->cal_failed || !q->exec_cksum) { continue; }
 
-  restart_inner_cull_loop:
+    u32 done = 0;
+    for (i = idx + 1; i < afl->queued_paths && !done; i++) {
 
-    p = q->next;
+      struct queue_entry *p = afl->queue_buf[i];
+      if (p->disabled || p->cal_failed || !p->exec_cksum) { continue; }
 
-    while (p) {
-
-      if (!p->cal_failed && p->exec_cksum == q->exec_cksum) {
+      if (p->exec_cksum == q->exec_cksum) {
 
         duplicates = 1;
-        --afl->pending_not_fuzzed;
-        afl->active_paths--;
-
-        // We do not remove any of the memory allocated because for
-        // splicing the data might still be interesting.
-        // We only decouple them from the linked list.
-        // This will result in some leaks at exit, but who cares.
 
         // we keep the shorter file
         if (p->len >= q->len) {
 
+          if (!p->was_fuzzed) {
+
+            p->was_fuzzed = 1;
+            --afl->pending_not_fuzzed;
+            --afl->active_paths;
+
+          }
+
           p->disabled = 1;
           p->perf_score = 0;
-          q->next = p->next;
-          goto restart_inner_cull_loop;
 
         } else {
 
+          if (!q->was_fuzzed) {
+
+            q->was_fuzzed = 1;
+            --afl->pending_not_fuzzed;
+            --afl->active_paths;
+
+          }
+
           q->disabled = 1;
           q->perf_score = 0;
-          if (prev)
-            prev->next = q = p;
-          else
-            afl->queue = q = p;
-          goto restart_outer_cull_loop;
+
+          done = 1;
 
         }
 
       }
 
-      p = p->next;
-
     }
 
-  next_entry:
-
-    prev = q;
-    q = q->next;
-
   }
 
   if (duplicates) {
 
     afl->max_depth = 0;
-    q = afl->queue;
-    while (q) {
 
-      if (q->depth > afl->max_depth) afl->max_depth = q->depth;
-      q = q->next;
+    for (idx = 0; idx < afl->queued_paths; idx++) {
+
+      if (!afl->queue_buf[idx]->disabled &&
+          afl->queue_buf[idx]->depth > afl->max_depth)
+        afl->max_depth = afl->queue_buf[idx]->depth;
 
     }
 
@@ -1181,12 +1242,16 @@ static void link_or_copy(u8 *old_path, u8 *new_path) {
 
 void pivot_inputs(afl_state_t *afl) {
 
-  struct queue_entry *q = afl->queue;
-  u32                 id = 0;
+  struct queue_entry *q;
+  u32                 id = 0, i;
 
   ACTF("Creating hard links for all input files...");
 
-  while (q) {
+  for (i = 0; i < afl->queued_paths; i++) {
+
+    q = afl->queue_buf[i];
+
+    if (unlikely(q->disabled)) { continue; }
 
     u8 *nfn, *rsl = strrchr(q->fname, '/');
     u32 orig_id;
@@ -1214,19 +1279,14 @@ void pivot_inputs(afl_state_t *afl) {
       afl->resuming_fuzz = 1;
       nfn = alloc_printf("%s/queue/%s", afl->out_dir, rsl);
 
-      /* Since we're at it, let's also try to find parent and figure out the
+      /* Since we're at it, let's also get the parent and figure out the
          appropriate depth for this entry. */
 
       src_str = strchr(rsl + 3, ':');
 
       if (src_str && sscanf(src_str + 1, "%06u", &src_id) == 1) {
 
-        struct queue_entry *s = afl->queue;
-        while (src_id-- && s) {
-
-          s = s->next;
-
-        }
+        struct queue_entry *s = afl->queue_buf[src_id];
 
         if (s) { q->depth = s->depth + 1; }
 
@@ -1274,7 +1334,6 @@ void pivot_inputs(afl_state_t *afl) {
 
     if (q->passed_det) { mark_as_det_done(afl, q); }
 
-    q = q->next;
     ++id;
 
   }
@@ -2434,6 +2493,7 @@ void setup_testcase_shmem(afl_state_t *afl) {
 
   // we need to set the non-instrumented mode to not overwrite the SHM_ENV_VAR
   u8 *map = afl_shm_init(afl->shm_fuzz, MAX_FILE + sizeof(u32), 1);
+  afl->shm_fuzz->shmemfuzz_mode = 1;
 
   if (!map) { FATAL("BUG: Zero return from afl_shm_init."); }
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index f9509e86..c73e394a 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -26,6 +26,7 @@
 #include "afl-fuzz.h"
 #include <string.h>
 #include <limits.h>
+#include "cmplog.h"
 
 /* MOpt */
 
@@ -165,7 +166,7 @@ static u8 could_be_arith(u32 old_val, u32 new_val, u8 blen) {
 
   /* See if one-byte adjustments to any byte could produce this result. */
 
-  for (i = 0; i < blen; ++i) {
+  for (i = 0; (u8)i < blen; ++i) {
 
     u8 a = old_val >> (8 * i), b = new_val >> (8 * i);
 
@@ -193,7 +194,7 @@ static u8 could_be_arith(u32 old_val, u32 new_val, u8 blen) {
 
   diffs = 0;
 
-  for (i = 0; i < blen / 2; ++i) {
+  for (i = 0; (u8)i < blen / 2; ++i) {
 
     u16 a = old_val >> (16 * i), b = new_val >> (16 * i);
 
@@ -290,7 +291,7 @@ static u8 could_be_interest(u32 old_val, u32 new_val, u8 blen, u8 check_le) {
 
   /* See if two-byte insertions over old_val could give us new_val. */
 
-  for (i = 0; (s32)i < blen - 1; ++i) {
+  for (i = 0; (u8)i < blen - 1; ++i) {
 
     for (j = 0; j < sizeof(interesting_16) / 2; ++j) {
 
@@ -530,7 +531,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
     len = afl->queue_cur->len;
 
     /* maybe current entry is not ready for splicing anymore */
-    if (unlikely(len <= 4 && old_len > 4)) afl->ready_for_splicing_count--;
+    if (unlikely(len <= 4 && old_len > 4)) --afl->ready_for_splicing_count;
 
   }
 
@@ -543,16 +544,33 @@ u8 fuzz_one_original(afl_state_t *afl) {
   if (likely(!afl->old_seed_selection))
     orig_perf = perf_score = afl->queue_cur->perf_score;
   else
-    orig_perf = perf_score = calculate_score(afl, afl->queue_cur);
+    afl->queue_cur->perf_score = orig_perf = perf_score =
+        calculate_score(afl, afl->queue_cur);
 
-  if (unlikely(perf_score == 0)) { goto abandon_entry; }
+  if (unlikely(perf_score <= 0)) { goto abandon_entry; }
 
-  if (unlikely(afl->shm.cmplog_mode && !afl->queue_cur->fully_colorized)) {
+  if (unlikely(afl->shm.cmplog_mode &&
+               afl->queue_cur->colorized < afl->cmplog_lvl &&
+               (u32)len <= afl->cmplog_max_filesize)) {
 
-    if (input_to_state_stage(afl, in_buf, out_buf, len,
-                             afl->queue_cur->exec_cksum)) {
+    if (unlikely(len < 4)) {
 
-      goto abandon_entry;
+      afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+
+    } else {
+
+      if (afl->cmplog_lvl == 3 ||
+          (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) ||
+          !(afl->fsrv.total_execs % afl->queued_paths) ||
+          get_cur_time() - afl->last_path_time > 300000) {
+
+        if (input_to_state_stage(afl, in_buf, out_buf, len)) {
+
+          goto abandon_entry;
+
+        }
+
+      }
 
     }
 
@@ -2766,11 +2784,16 @@ abandon_entry:
      cycle and have not seen this entry before. */
 
   if (!afl->stop_soon && !afl->queue_cur->cal_failed &&
-      (afl->queue_cur->was_fuzzed == 0 || afl->queue_cur->fuzz_level == 0)) {
+      (afl->queue_cur->was_fuzzed == 0 || afl->queue_cur->fuzz_level == 0) &&
+      !afl->queue_cur->disabled) {
 
-    --afl->pending_not_fuzzed;
-    afl->queue_cur->was_fuzzed = 1;
-    if (afl->queue_cur->favored) { --afl->pending_favored; }
+    if (!afl->queue_cur->was_fuzzed) {
+
+      --afl->pending_not_fuzzed;
+      afl->queue_cur->was_fuzzed = 1;
+      if (afl->queue_cur->favored) { --afl->pending_favored; }
+
+    }
 
   }
 
@@ -2796,7 +2819,7 @@ static u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
 
   }
 
-  s32 len, temp_len;
+  u32 len, temp_len;
   u32 i;
   u32 j;
   u8 *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0;
@@ -2937,7 +2960,7 @@ static u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
     len = afl->queue_cur->len;
 
     /* maybe current entry is not ready for splicing anymore */
-    if (unlikely(len <= 4 && old_len > 4)) afl->ready_for_splicing_count--;
+    if (unlikely(len <= 4 && old_len > 4)) --afl->ready_for_splicing_count;
 
   }
 
@@ -2952,14 +2975,30 @@ static u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
   else
     orig_perf = perf_score = calculate_score(afl, afl->queue_cur);
 
-  if (unlikely(perf_score == 0)) { goto abandon_entry; }
+  if (unlikely(perf_score <= 0)) { goto abandon_entry; }
 
-  if (unlikely(afl->shm.cmplog_mode && !afl->queue_cur->fully_colorized)) {
+  if (unlikely(afl->shm.cmplog_mode &&
+               afl->queue_cur->colorized < afl->cmplog_lvl &&
+               (u32)len <= afl->cmplog_max_filesize)) {
 
-    if (input_to_state_stage(afl, in_buf, out_buf, len,
-                             afl->queue_cur->exec_cksum)) {
+    if (unlikely(len < 4)) {
 
-      goto abandon_entry;
+      afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+
+    } else {
+
+      if (afl->cmplog_lvl == 3 ||
+          (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) ||
+          !(afl->fsrv.total_execs % afl->queued_paths) ||
+          get_cur_time() - afl->last_path_time > 300000) {
+
+        if (input_to_state_stage(afl, in_buf, out_buf, len)) {
+
+          goto abandon_entry;
+
+        }
+
+      }
 
     }
 
@@ -3315,7 +3354,7 @@ static u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
 
   orig_hit_cnt = new_hit_cnt;
 
-  for (i = 0; (s32)i < len - 1; ++i) {
+  for (i = 0; i < len - 1; ++i) {
 
     /* Let's consult the effector map... */
 
@@ -3357,7 +3396,7 @@ static u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
 
   orig_hit_cnt = new_hit_cnt;
 
-  for (i = 0; (s32)i < len - 3; ++i) {
+  for (i = 0; i < len - 3; ++i) {
 
     /* Let's consult the effector map... */
     if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
@@ -3489,7 +3528,7 @@ skip_bitflip:
 
   orig_hit_cnt = new_hit_cnt;
 
-  for (i = 0; (s32)i < len - 1; ++i) {
+  for (i = 0; i < len - 1; ++i) {
 
     u16 orig = *(u16 *)(out_buf + i);
 
@@ -3615,7 +3654,7 @@ skip_bitflip:
 
   orig_hit_cnt = new_hit_cnt;
 
-  for (i = 0; (s32)i < len - 3; ++i) {
+  for (i = 0; i < len - 3; ++i) {
 
     u32 orig = *(u32 *)(out_buf + i);
 
@@ -3805,7 +3844,7 @@ skip_arith:
 
   orig_hit_cnt = new_hit_cnt;
 
-  for (i = 0; (s32)i < len - 1; ++i) {
+  for (i = 0; i < len - 1; ++i) {
 
     u16 orig = *(u16 *)(out_buf + i);
 
@@ -3891,7 +3930,7 @@ skip_arith:
 
   orig_hit_cnt = new_hit_cnt;
 
-  for (i = 0; (s32)i < len - 3; ++i) {
+  for (i = 0; i < len - 3; ++i) {
 
     u32 orig = *(u32 *)(out_buf + i);
 
@@ -4120,7 +4159,7 @@ skip_user_extras:
 
       /* See the comment in the earlier code; extras are sorted by size. */
 
-      if ((s32)(afl->a_extras[j].len) > (s32)(len - i) ||
+      if ((afl->a_extras[j].len) > (len - i) ||
           !memcmp(afl->a_extras[j].data, out_buf + i, afl->a_extras[j].len) ||
           !memchr(eff_map + EFF_APOS(i), 1,
                   EFF_SPAN_ALEN(i, afl->a_extras[j].len))) {
@@ -4749,8 +4788,7 @@ pacemaker_fuzzing:
 
               }
 
-              afl->stage_cycles_puppet_v2[afl->swarm_now]
-                                         [STAGE_OverWriteExtra]++;
+              MOpt_globals.cycles_v2[STAGE_OverWriteExtra]++;
 
               break;
 
@@ -4805,8 +4843,7 @@ pacemaker_fuzzing:
               memcpy(out_buf + insert_at, ptr, extra_len);
 
               temp_len += extra_len;
-              afl->stage_cycles_puppet_v2[afl->swarm_now][STAGE_InsertExtra] +=
-                  1;
+              MOpt_globals.cycles_v2[STAGE_InsertExtra]++;
               break;
 
             }
@@ -4837,7 +4874,7 @@ pacemaker_fuzzing:
                 u32 copy_from, copy_to, copy_len;
 
                 copy_len = choose_block_len(afl, new_len - 1);
-                if ((s32)copy_len > temp_len) copy_len = temp_len;
+                if (copy_len > temp_len) copy_len = temp_len;
 
                 copy_from = rand_below(afl, new_len - copy_len + 1);
                 copy_to = rand_below(afl, temp_len - copy_len + 1);
@@ -4888,7 +4925,7 @@ pacemaker_fuzzing:
 
               }
 
-              afl->stage_cycles_puppet_v2[afl->swarm_now][STAGE_Splice]++;
+              MOpt_globals.cycles_v2[STAGE_Splice]++;
               break;
 
             }  // end of default:
@@ -5033,8 +5070,7 @@ pacemaker_fuzzing:
            the last differing byte. Bail out if the difference is just a single
            byte or so. */
 
-        locate_diffs(in_buf, new_buf, MIN(len, (s32)target->len), &f_diff,
-                     &l_diff);
+        locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);
 
         if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
 
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index 66938635..ad3e3b8e 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -143,7 +143,7 @@ void create_alias_table(afl_state_t *afl) {
 
       struct queue_entry *q = afl->queue_buf[i];
 
-      if (!q->disabled) { q->perf_score = calculate_score(afl, q); }
+      if (likely(!q->disabled)) { q->perf_score = calculate_score(afl, q); }
 
       sum += q->perf_score;
 
@@ -313,17 +313,18 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
 
 /* check if ascii or UTF-8 */
 
-static u8 check_if_text(struct queue_entry *q) {
+static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) {
 
   if (q->len < AFL_TXT_MIN_LEN) return 0;
 
-  u8      buf[MAX_FILE];
+  u8 *    buf;
   int     fd;
   u32     len = q->len, offset = 0, ascii = 0, utf8 = 0;
   ssize_t comp;
 
   if (len >= MAX_FILE) len = MAX_FILE - 1;
   if ((fd = open(q->fname, O_RDONLY)) < 0) return 0;
+  buf = afl_realloc(AFL_BUF_PARAM(in_scratch), len);
   comp = read(fd, buf, len);
   close(fd);
   if (comp != (ssize_t)len) return 0;
@@ -433,6 +434,7 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
   q->passed_det = passed_det;
   q->trace_mini = NULL;
   q->testcase_buf = NULL;
+  q->mother = afl->queue_cur;
 
 #ifdef INTROSPECTION
   q->bitsmap_size = afl->bitsmap_size;
@@ -442,7 +444,6 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
 
   if (afl->queue_top) {
 
-    afl->queue_top->next = q;
     afl->queue_top = q;
 
   } else {
@@ -463,6 +464,7 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
       AFL_BUF_PARAM(queue), afl->queued_paths * sizeof(struct queue_entry *));
   if (unlikely(!queue_buf)) { PFATAL("alloc"); }
   queue_buf[afl->queued_paths - 1] = q;
+  q->id = afl->queued_paths - 1;
 
   afl->last_path_time = get_cur_time();
 
@@ -486,7 +488,7 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
   }
 
   /* only redqueen currently uses is_ascii */
-  if (afl->shm.cmplog_mode) q->is_ascii = check_if_text(q);
+  if (afl->shm.cmplog_mode) q->is_ascii = check_if_text(afl, q);
 
 }
 
@@ -639,10 +641,9 @@ void cull_queue(afl_state_t *afl) {
 
   if (likely(!afl->score_changed || afl->non_instrumented_mode)) { return; }
 
-  struct queue_entry *q;
-  u32                 len = (afl->fsrv.map_size >> 3);
-  u32                 i;
-  u8 *                temp_v = afl->map_tmp_buf;
+  u32 len = (afl->fsrv.map_size >> 3);
+  u32 i;
+  u8 *temp_v = afl->map_tmp_buf;
 
   afl->score_changed = 0;
 
@@ -651,12 +652,9 @@ void cull_queue(afl_state_t *afl) {
   afl->queued_favored = 0;
   afl->pending_favored = 0;
 
-  q = afl->queue;
-
-  while (q) {
+  for (i = 0; i < afl->queued_paths; i++) {
 
-    q->favored = 0;
-    q = q->next;
+    afl->queue_buf[i]->favored = 0;
 
   }
 
@@ -695,12 +693,13 @@ void cull_queue(afl_state_t *afl) {
 
   }
 
-  q = afl->queue;
+  for (i = 0; i < afl->queued_paths; i++) {
+
+    if (likely(!afl->queue_buf[i]->disabled)) {
 
-  while (q) {
+      mark_as_redundant(afl, afl->queue_buf[i], !afl->queue_buf[i]->favored);
 
-    mark_as_redundant(afl, q, !q->favored);
-    q = q->next;
+    }
 
   }
 
@@ -850,13 +849,15 @@ u32 calculate_score(afl_state_t *afl, struct queue_entry *q) {
       // Don't modify perf_score for unfuzzed seeds
       if (q->fuzz_level == 0) break;
 
-      struct queue_entry *queue_it = afl->queue;
-      while (queue_it) {
+      u32 i;
+      for (i = 0; i < afl->queued_paths; i++) {
 
-        fuzz_mu += log2(afl->n_fuzz[q->n_fuzz_entry]);
-        n_paths++;
+        if (likely(!afl->queue_buf[i]->disabled)) {
 
-        queue_it = queue_it->next;
+          fuzz_mu += log2(afl->n_fuzz[afl->queue_buf[i]->n_fuzz_entry]);
+          n_paths++;
+
+        }
 
       }
 
diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index 28585afe..cf65d3c1 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -28,13 +28,40 @@
 #include "afl-fuzz.h"
 #include "cmplog.h"
 
-///// Colorization
+//#define _DEBUG
+//#define CMPLOG_INTROSPECTION
+#define CMPLOG_COMBINE
+
+// CMP attribute enum
+enum {
+
+  IS_EQUAL = 1,    // arithemtic equal comparison
+  IS_GREATER = 2,  // arithmetic greater comparison
+  IS_LESSER = 4,   // arithmetic lesser comparison
+  IS_FP = 8,       // is a floating point, not an integer
+  /* --- below are internal settings, not from target cmplog */
+  IS_FP_MOD = 16,    // arithemtic changed floating point
+  IS_INT_MOD = 32,   // arithmetic changed interger
+  IS_TRANSFORM = 64  // transformed integer
+
+};
+
+// CMPLOG LVL
+enum {
+
+  LVL1 = 1,  // Integer solving
+  LVL2 = 2,  // unused except for setting the queue entry
+  LVL3 = 4   // expensive tranformations
+
+};
 
 struct range {
 
   u32           start;
   u32           end;
   struct range *next;
+  struct range *prev;
+  u8            ok;
 
 };
 
@@ -44,6 +71,8 @@ static struct range *add_range(struct range *ranges, u32 start, u32 end) {
   r->start = start;
   r->end = end;
   r->next = ranges;
+  r->ok = 0;
+  if (likely(ranges)) ranges->prev = r;
   return r;
 
 }
@@ -51,131 +80,303 @@ static struct range *add_range(struct range *ranges, u32 start, u32 end) {
 static struct range *pop_biggest_range(struct range **ranges) {
 
   struct range *r = *ranges;
-  struct range *prev = NULL;
   struct range *rmax = NULL;
-  struct range *prev_rmax = NULL;
   u32           max_size = 0;
 
   while (r) {
 
-    u32 s = r->end - r->start;
-    if (s >= max_size) {
+    if (!r->ok) {
+
+      u32 s = 1 + r->end - r->start;
 
-      max_size = s;
-      prev_rmax = prev;
-      rmax = r;
+      if (s >= max_size) {
+
+        max_size = s;
+        rmax = r;
+
+      }
 
     }
 
-    prev = r;
     r = r->next;
 
   }
 
-  if (rmax) {
+  return rmax;
 
-    if (prev_rmax) {
+}
 
-      prev_rmax->next = rmax->next;
+#ifdef _DEBUG
+// static int  logging = 0;
+static void dump(char *txt, u8 *buf, u32 len) {
 
-    } else {
+  u32 i;
+  fprintf(stderr, "DUMP %s %016llx ", txt, hash64(buf, len, HASH_CONST));
+  for (i = 0; i < len; i++)
+    fprintf(stderr, "%02x", buf[i]);
+  fprintf(stderr, "\n");
 
-      *ranges = rmax->next;
+}
 
-    }
+static void dump_file(char *path, char *name, u32 counter, u8 *buf, u32 len) {
 
-  }
+  char fn[4096];
+  if (!path) path = ".";
+  snprintf(fn, sizeof(fn), "%s/%s%d", path, name, counter);
+  int fd = open(fn, O_RDWR | O_CREAT | O_TRUNC, 0644);
+  if (fd >= 0) {
 
-  return rmax;
+    write(fd, buf, len);
+    close(fd);
+
+  }
 
 }
 
+#endif
+
 static u8 get_exec_checksum(afl_state_t *afl, u8 *buf, u32 len, u64 *cksum) {
 
   if (unlikely(common_fuzz_stuff(afl, buf, len))) { return 1; }
 
   *cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
+
   return 0;
 
 }
 
-static void xor_replace(u8 *buf, u32 len) {
+/* replace everything with different values but stay in the same type */
+static void type_replace(afl_state_t *afl, u8 *buf, u32 len) {
 
   u32 i;
+  u8  c;
   for (i = 0; i < len; ++i) {
 
-    buf[i] ^= 0xff;
+    // wont help for UTF or non-latin charsets
+    do {
+
+      switch (buf[i]) {
+
+        case 'A' ... 'F':
+          c = 'A' + rand_below(afl, 1 + 'F' - 'A');
+          break;
+        case 'a' ... 'f':
+          c = 'a' + rand_below(afl, 1 + 'f' - 'a');
+          break;
+        case '0':
+          c = '1';
+          break;
+        case '1':
+          c = '0';
+          break;
+        case '2' ... '9':
+          c = '2' + rand_below(afl, 1 + '9' - '2');
+          break;
+        case 'G' ... 'Z':
+          c = 'G' + rand_below(afl, 1 + 'Z' - 'G');
+          break;
+        case 'g' ... 'z':
+          c = 'g' + rand_below(afl, 1 + 'z' - 'g');
+          break;
+        case '!' ... '*':
+          c = '!' + rand_below(afl, 1 + '*' - '!');
+          break;
+        case ',' ... '.':
+          c = ',' + rand_below(afl, 1 + '.' - ',');
+          break;
+        case ':' ... '@':
+          c = ':' + rand_below(afl, 1 + '@' - ':');
+          break;
+        case '[' ... '`':
+          c = '[' + rand_below(afl, 1 + '`' - '[');
+          break;
+        case '{' ... '~':
+          c = '{' + rand_below(afl, 1 + '~' - '{');
+          break;
+        case '+':
+          c = '/';
+          break;
+        case '/':
+          c = '+';
+          break;
+        case ' ':
+          c = '\t';
+          break;
+        case '\t':
+          c = ' ';
+          break;
+          /*
+                case '\r':
+                case '\n':
+                  // nothing ...
+                  break;
+          */
+        default:
+          c = (buf[i] ^ 0xff);
+
+      }
+
+    } while (c == buf[i]);
+
+    buf[i] = c;
 
   }
 
 }
 
-static u8 colorization(afl_state_t *afl, u8 *buf, u32 len, u64 exec_cksum) {
+static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
+                       struct tainted **taints) {
 
-  struct range *ranges = add_range(NULL, 0, len);
-  u8 *          backup = ck_alloc_nozero(len);
+  struct range *  ranges = add_range(NULL, 0, len - 1), *rng;
+  struct tainted *taint = NULL;
+  u8 *            backup = ck_alloc_nozero(len);
+  u8 *            changed = ck_alloc_nozero(len);
 
-  u64 orig_hit_cnt, new_hit_cnt;
+#if defined(_DEBUG) || defined(CMPLOG_INTROSPECTION)
+  u64 start_time = get_cur_time();
+#endif
+
+  u32 screen_update = 1000000 / afl->queue_cur->exec_us;
+  u64 orig_hit_cnt, new_hit_cnt, exec_cksum;
   orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
   afl->stage_name = "colorization";
   afl->stage_short = "colorization";
-  afl->stage_max = 1000;
-
-  struct range *rng = NULL;
+  afl->stage_max = (len << 1);
   afl->stage_cur = 0;
+
+  // in colorization we do not classify counts, hence we have to calculate
+  // the original checksum.
+  if (unlikely(get_exec_checksum(afl, buf, len, &exec_cksum))) {
+
+    goto checksum_fail;
+
+  }
+
+  memcpy(backup, buf, len);
+  memcpy(changed, buf, len);
+  type_replace(afl, changed, len);
+
   while ((rng = pop_biggest_range(&ranges)) != NULL &&
          afl->stage_cur < afl->stage_max) {
 
-    u32 s = rng->end - rng->start;
+    u32 s = 1 + rng->end - rng->start;
 
-    if (s != 0) {
+    memcpy(buf + rng->start, changed + rng->start, s);
 
-      /* Range not empty */
+    u64 cksum = 0;
+    u64 start_us = get_cur_time_us();
+    if (unlikely(get_exec_checksum(afl, buf, len, &cksum))) {
+
+      goto checksum_fail;
+
+    }
 
-      memcpy(backup, buf + rng->start, s);
-      xor_replace(buf + rng->start, s);
+    u64 stop_us = get_cur_time_us();
 
-      u64 cksum;
-      u64 start_us = get_cur_time_us();
-      if (unlikely(get_exec_checksum(afl, buf, len, &cksum))) {
+    /* Discard if the mutations change the path or if it is too decremental
+      in speed - how could the same path have a much different speed
+      though ...*/
+    if (cksum != exec_cksum ||
+        (unlikely(stop_us - start_us > 3 * afl->queue_cur->exec_us) &&
+         likely(!afl->fixed_seed))) {
 
-        goto checksum_fail;
+      memcpy(buf + rng->start, backup + rng->start, s);
+
+      if (s > 1) {  // to not add 0 size ranges
+
+        ranges = add_range(ranges, rng->start, rng->start - 1 + s / 2);
+        ranges = add_range(ranges, rng->start + s / 2, rng->end);
 
       }
 
-      u64 stop_us = get_cur_time_us();
+      if (ranges == rng) {
+
+        ranges = rng->next;
+        if (ranges) { ranges->prev = NULL; }
 
-      /* Discard if the mutations change the paths or if it is too decremental
-        in speed */
-      if (cksum != exec_cksum ||
-          ((stop_us - start_us > 2 * afl->queue_cur->exec_us) &&
-           likely(!afl->fixed_seed))) {
+      } else if (rng->next) {
 
-        ranges = add_range(ranges, rng->start, rng->start + s / 2);
-        ranges = add_range(ranges, rng->start + s / 2 + 1, rng->end);
-        memcpy(buf + rng->start, backup, s);
+        rng->prev->next = rng->next;
+        rng->next->prev = rng->prev;
+
+      } else {
+
+        if (rng->prev) { rng->prev->next = NULL; }
 
       }
 
+      free(rng);
+
+    } else {
+
+      rng->ok = 1;
+
     }
 
-    ck_free(rng);
-    rng = NULL;
-    ++afl->stage_cur;
+    if (++afl->stage_cur % screen_update) { show_stats(afl); };
 
   }
 
-  if (afl->stage_cur < afl->stage_max) { afl->queue_cur->fully_colorized = 1; }
+  rng = ranges;
+  while (rng) {
 
-  new_hit_cnt = afl->queued_paths + afl->unique_crashes;
-  afl->stage_finds[STAGE_COLORIZATION] += new_hit_cnt - orig_hit_cnt;
-  afl->stage_cycles[STAGE_COLORIZATION] += afl->stage_cur;
-  ck_free(backup);
+    rng = rng->next;
+
+  }
+
+  u32 i = 1;
+  u32 positions = 0;
+  while (i) {
+
+  restart:
+    i = 0;
+    struct range *r = NULL;
+    u32           pos = (u32)-1;
+    rng = ranges;
+
+    while (rng) {
+
+      if (rng->ok == 1 && rng->start < pos) {
+
+        if (taint && taint->pos + taint->len == rng->start) {
+
+          taint->len += (1 + rng->end - rng->start);
+          positions += (1 + rng->end - rng->start);
+          rng->ok = 2;
+          goto restart;
+
+        } else {
 
-  ck_free(rng);
-  rng = NULL;
+          r = rng;
+          pos = rng->start;
+
+        }
+
+      }
+
+      rng = rng->next;
+
+    }
+
+    if (r) {
+
+      struct tainted *t = ck_alloc_nozero(sizeof(struct tainted));
+      t->pos = r->start;
+      t->len = 1 + r->end - r->start;
+      positions += (1 + r->end - r->start);
+      if (likely(taint)) { taint->prev = t; }
+      t->next = taint;
+      t->prev = NULL;
+      taint = t;
+      r->ok = 2;
+      i = 1;
+
+    }
+
+  }
 
+  /* temporary: clean ranges */
   while (ranges) {
 
     rng = ranges;
@@ -185,12 +386,76 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len, u64 exec_cksum) {
 
   }
 
-  return 0;
+  new_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
-checksum_fail:
-  if (rng) { ck_free(rng); }
+#if defined(_DEBUG) || defined(CMPLOG_INTROSPECTION)
+  FILE *f = stderr;
+  #ifndef _DEBUG
+  if (afl->not_on_tty) {
+
+    char fn[4096];
+    snprintf(fn, sizeof(fn), "%s/introspection_cmplog.txt", afl->out_dir);
+    f = fopen(fn, "a");
+
+  }
+
+  #endif
+
+  if (f) {
+
+    fprintf(
+        f,
+        "Colorization: fname=%s len=%u ms=%llu result=%u execs=%u found=%llu "
+        "taint=%u\n",
+        afl->queue_cur->fname, len, get_cur_time() - start_time,
+        afl->queue_cur->colorized, afl->stage_cur, new_hit_cnt - orig_hit_cnt,
+        positions);
+
+  #ifndef _DEBUG
+    if (afl->not_on_tty) { fclose(f); }
+  #endif
+
+  }
+
+#endif
+
+  if (taint) {
+
+    if (len / positions == 1 && positions > CMPLOG_POSITIONS_MAX &&
+        afl->active_paths / afl->colorize_success > CMPLOG_CORPUS_PERCENT) {
+
+#ifdef _DEBUG
+      fprintf(stderr, "Colorization unsatisfactory\n");
+#endif
+
+      *taints = NULL;
+
+      struct tainted *t;
+      while (taint) {
+
+        t = taint->next;
+        ck_free(taint);
+        taint = t;
+
+      }
+
+    } else {
+
+      *taints = taint;
+      ++afl->colorize_success;
+
+    }
+
+  }
+
+  afl->stage_finds[STAGE_COLORIZATION] += new_hit_cnt - orig_hit_cnt;
+  afl->stage_cycles[STAGE_COLORIZATION] += afl->stage_cur;
   ck_free(backup);
+  ck_free(changed);
 
+  return 0;
+
+checksum_fail:
   while (ranges) {
 
     rng = ranges;
@@ -200,6 +465,9 @@ checksum_fail:
 
   }
 
+  ck_free(backup);
+  ck_free(changed);
+
   return 1;
 
 }
@@ -212,12 +480,19 @@ static u8 its_fuzz(afl_state_t *afl, u8 *buf, u32 len, u8 *status) {
 
   orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
+#ifdef _DEBUG
+  dump("DATA", buf, len);
+#endif
+
   if (unlikely(common_fuzz_stuff(afl, buf, len))) { return 1; }
 
   new_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
   if (unlikely(new_hit_cnt != orig_hit_cnt)) {
 
+#ifdef _DEBUG
+    fprintf(stderr, "NEW FIND\n");
+#endif
     *status = 1;
 
   } else {
@@ -230,6 +505,7 @@ static u8 its_fuzz(afl_state_t *afl, u8 *buf, u32 len, u8 *status) {
 
 }
 
+#ifdef CMPLOG_SOLVE_TRANSFORM
 static int strntoll(const char *str, size_t sz, char **end, int base,
                     long long *out) {
 
@@ -237,6 +513,8 @@ static int strntoll(const char *str, size_t sz, char **end, int base,
   long long   ret;
   const char *beg = str;
 
+  if (!str || !sz) { return 1; }
+
   for (; beg && sz && *beg == ' '; beg++, sz--) {};
 
   if (!sz) return 1;
@@ -260,6 +538,8 @@ static int strntoull(const char *str, size_t sz, char **end, int base,
   unsigned long long ret;
   const char *       beg = str;
 
+  if (!str || !sz) { return 1; }
+
   for (; beg && sz && *beg == ' '; beg++, sz--)
     ;
 
@@ -277,12 +557,161 @@ static int strntoull(const char *str, size_t sz, char **end, int base,
 
 }
 
-static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
-                              u64 pattern, u64 repl, u64 o_pattern, u32 idx,
-                              u8 *orig_buf, u8 *buf, u32 len, u8 do_reverse,
-                              u8 *status) {
+static u8 hex_table_up[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+                              '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+static u8 hex_table_low[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+                               '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+static u8 hex_table[] = {0, 1, 2, 3,  4,  5,  6,  7,  8,  9,  0,  0,  0, 0,
+                         0, 0, 0, 10, 11, 12, 13, 14, 15, 0,  0,  0,  0, 0,
+                         0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,
+                         0, 0, 0, 0,  0,  0,  0,  10, 11, 12, 13, 14, 15};
+
+// tests 2 bytes at location
+static int is_hex(const char *str) {
+
+  u32 i;
+
+  for (i = 0; i < 2; i++) {
+
+    switch (str[i]) {
+
+      case '0' ... '9':
+      case 'A' ... 'F':
+      case 'a' ... 'f':
+        break;
+      default:
+        return 0;
+
+    }
+
+  }
+
+  return 1;
+
+}
+
+  #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
+// tests 4 bytes at location
+static int is_base64(const char *str) {
+
+  u32 i;
+
+  for (i = 0; i < 4; i++) {
+
+    switch (str[i]) {
+
+      case '0' ... '9':
+      case 'A' ... 'Z':
+      case 'a' ... 'z':
+      case '+':
+      case '/':
+      case '=':
+        break;
+      default:
+        return 0;
+
+    }
+
+  }
+
+  return 1;
+
+}
+
+static u8 base64_encode_table[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static u8 base64_decode_table[] = {
+
+    62, 0,  0,  0,  63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0,
+    0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+    10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+    0,  0,  0,  0,  0,  0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+    36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
+
+static u32 from_base64(u8 *src, u8 *dst, u32 dst_len) {
+
+  u32 i, j, v;
+  u32 len = ((dst_len / 3) << 2);
+  u32 ret = 0;
+
+  for (i = 0, j = 0; i < len; i += 4, j += 3) {
 
-  if (!buf) { FATAL("BUG: buf was NULL. Please report this.\n"); }
+    v = base64_decode_table[src[i] - 43];
+    v = (v << 6) | base64_decode_table[src[i + 1] - 43];
+    v = src[i + 2] == '=' ? v << 6
+                          : (v << 6) | base64_decode_table[src[i + 2] - 43];
+    v = src[i + 3] == '=' ? v << 6
+                          : (v << 6) | base64_decode_table[src[i + 3] - 43];
+
+    dst[j] = (v >> 16) & 0xFF;
+    ++ret;
+
+    if (src[i + 2] != '=') {
+
+      dst[j + 1] = (v >> 8) & 0xFF;
+      ++ret;
+
+    }
+
+    if (src[i + 3] != '=') {
+
+      dst[j + 2] = v & 0xFF;
+      ++ret;
+
+    }
+
+  }
+
+  return ret;
+
+}
+
+static void to_base64(u8 *src, u8 *dst, u32 dst_len) {
+
+  u32 i, j, v;
+  u32 len = (dst_len >> 2) * 3;
+
+  for (i = 0, j = 0; i < len; i += 3, j += 4) {
+
+    v = src[i];
+    v = i + 1 < len ? v << 8 | src[i + 1] : v << 8;
+    v = i + 2 < len ? v << 8 | src[i + 2] : v << 8;
+
+    dst[j] = base64_encode_table[(v >> 18) & 0x3F];
+    dst[j + 1] = base64_encode_table[(v >> 12) & 0x3F];
+    if (i + 1 < len) {
+
+      dst[j + 2] = base64_encode_table[(v >> 6) & 0x3F];
+
+    } else {
+
+      dst[j + 2] = '=';
+
+    }
+
+    if (i + 2 < len) {
+
+      dst[j + 3] = base64_encode_table[v & 0x3F];
+
+    } else {
+
+      dst[j + 3] = '=';
+
+    }
+
+  }
+
+}
+
+  #endif
+
+#endif
+
+static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
+                              u64 pattern, u64 repl, u64 o_pattern,
+                              u64 changed_val, u8 attr, u32 idx, u32 taint_len,
+                              u8 *orig_buf, u8 *buf, u8 *cbuf, u32 len,
+                              u8 do_reverse, u8 lvl, u8 *status) {
 
   u64 *buf_64 = (u64 *)&buf[idx];
   u32 *buf_32 = (u32 *)&buf[idx];
@@ -293,76 +722,423 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
   u16 *o_buf_16 = (u16 *)&orig_buf[idx];
   u8 * o_buf_8 = &orig_buf[idx];
 
-  u32 its_len = len - idx;
-  // *status = 0;
+  u32 its_len = MIN(len - idx, taint_len);
 
-  u8 *               endptr;
-  u8                 use_num = 0, use_unum = 0;
-  unsigned long long unum;
-  long long          num;
+  // fprintf(stderr,
+  //         "Encode: %llx->%llx into %llx(<-%llx) at idx=%u "
+  //         "taint_len=%u shape=%u attr=%u\n",
+  //         o_pattern, pattern, repl, changed_val, idx, taint_len,
+  //         h->shape + 1, attr);
 
-  if (afl->queue_cur->is_ascii) {
+#ifdef CMPLOG_SOLVE_TRANSFORM
+  // reverse atoi()/strnu?toll() is expensive, so we only to it in lvl 3
+  if (lvl & LVL3) {
 
-    endptr = buf_8;
-    if (strntoll(buf_8, len - idx, (char **)&endptr, 0, &num)) {
+    u8 *               endptr;
+    u8                 use_num = 0, use_unum = 0;
+    unsigned long long unum;
+    long long          num;
 
-      if (!strntoull(buf_8, len - idx, (char **)&endptr, 0, &unum))
-        use_unum = 1;
+    if (afl->queue_cur->is_ascii) {
 
-    } else
+      endptr = buf_8;
+      if (strntoll(buf_8, len - idx, (char **)&endptr, 0, &num)) {
 
-      use_num = 1;
+        if (!strntoull(buf_8, len - idx, (char **)&endptr, 0, &unum))
+          use_unum = 1;
 
-  }
+      } else
+
+        use_num = 1;
+
+    }
+
+  #ifdef _DEBUG
+    if (idx == 0)
+      fprintf(stderr, "ASCII is=%u use_num=%u use_unum=%u idx=%u %llx==%llx\n",
+              afl->queue_cur->is_ascii, use_num, use_unum, idx, num, pattern);
+  #endif
+
+    // num is likely not pattern as atoi("AAA") will be zero...
+    if (use_num && ((u64)num == pattern || !num)) {
+
+      u8     tmp_buf[32];
+      size_t num_len = snprintf(tmp_buf, sizeof(tmp_buf), "%lld", repl);
+      size_t old_len = endptr - buf_8;
+
+      u8 *new_buf = afl_realloc((void **)&afl->out_scratch_buf, len + num_len);
+      if (unlikely(!new_buf)) { PFATAL("alloc"); }
+
+      memcpy(new_buf, buf, idx);
+      memcpy(new_buf + idx, tmp_buf, num_len);
+      memcpy(new_buf + idx + num_len, buf_8 + old_len, len - idx - old_len);
+
+      if (new_buf[idx + num_len] >= '0' && new_buf[idx + num_len] <= '9') {
 
-  if (use_num && (u64)num == pattern) {
+        new_buf[idx + num_len] = ' ';
 
-    size_t old_len = endptr - buf_8;
-    size_t num_len = snprintf(NULL, 0, "%lld", num);
+      }
+
+      if (unlikely(its_fuzz(afl, new_buf, len, status))) { return 1; }
+
+    } else if (use_unum && (unum == pattern || !unum)) {
+
+      u8     tmp_buf[32];
+      size_t num_len = snprintf(tmp_buf, sizeof(tmp_buf), "%llu", repl);
+      size_t old_len = endptr - buf_8;
+
+      u8 *new_buf = afl_realloc((void **)&afl->out_scratch_buf, len + num_len);
+      if (unlikely(!new_buf)) { PFATAL("alloc"); }
+
+      memcpy(new_buf, buf, idx);
+      memcpy(new_buf + idx, tmp_buf, num_len);
+      memcpy(new_buf + idx + num_len, buf_8 + old_len, len - idx - old_len);
+
+      if (new_buf[idx + num_len] >= '0' && new_buf[idx + num_len] <= '9') {
+
+        new_buf[idx + num_len] = ' ';
+
+      }
+
+      if (unlikely(its_fuzz(afl, new_buf, len, status))) { return 1; }
+
+    }
+
+    // Try to identify transform magic
+    if (pattern != o_pattern && repl == changed_val && attr <= IS_EQUAL) {
+
+      u64 *ptr = (u64 *)&buf[idx];
+      u64 *o_ptr = (u64 *)&orig_buf[idx];
+      u64  b_val, o_b_val, mask;
+
+      switch (SHAPE_BYTES(h->shape)) {
+
+        case 0:
+        case 1:
+          b_val = (u64)(*ptr % 0x100);
+          o_b_val = (u64)(*o_ptr % 0x100);
+          mask = 0xff;
+          break;
+        case 2:
+        case 3:
+          b_val = (u64)(*ptr % 0x10000);
+          o_b_val = (u64)(*o_ptr % 0x10000);
+          mask = 0xffff;
+          break;
+        case 4:
+        case 5:
+        case 6:
+        case 7:
+          b_val = (u64)(*ptr % 0x100000000);
+          o_b_val = (u64)(*o_ptr % 0x100000000);
+          mask = 0xffffffff;
+          break;
+        default:
+          b_val = *ptr;
+          o_b_val = *o_ptr;
+          mask = 0xffffffffffffffff;
+
+      }
+
+      // test for arithmetic, eg. "if ((user_val - 0x1111) == 0x1234) ..."
+      s64 diff = pattern - b_val;
+      s64 o_diff = o_pattern - o_b_val;
+      /*
+            fprintf(stderr, "DIFF1 idx=%03u shape=%02u %llx-%llx=%lx\n", idx,
+                    h->shape + 1, o_pattern, o_b_val, o_diff);
+            fprintf(stderr, "DIFF1 %016llx %llx-%llx=%lx\n", repl, pattern,
+         b_val, diff);*/
+      if (diff == o_diff && diff) {
+
+        // this could be an arithmetic transformation
+
+        u64 new_repl = (u64)((s64)repl - diff);
+        //        fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+
+        if (unlikely(cmp_extend_encoding(
+                afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
+                taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
+
+          return 1;
+
+        }
+
+        // if (*status == 1) { fprintf(stderr, "FOUND!\n"); }
+
+      }
+
+      // test for XOR, eg. "if ((user_val ^ 0xabcd) == 0x1234) ..."
+      if (*status != 1) {
+
+        diff = pattern ^ b_val;
+        s64 o_diff = o_pattern ^ o_b_val;
+
+        /*        fprintf(stderr, "DIFF2 idx=%03u shape=%02u %llx-%llx=%lx\n",
+           idx, h->shape + 1, o_pattern, o_b_val, o_diff); fprintf(stderr,
+           "DIFF2 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);*/
+        if (diff == o_diff && diff) {
+
+          // this could be a XOR transformation
+
+          u64 new_repl = (u64)((s64)repl ^ diff);
+          //          fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+
+          if (unlikely(cmp_extend_encoding(
+                  afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
+                  taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
+
+            return 1;
+
+          }
+
+          // if (*status == 1) { fprintf(stderr, "FOUND!\n"); }
+
+        }
+
+      }
+
+      // test for to lowercase, eg. "new_val = (user_val | 0x2020) ..."
+      if (*status != 1) {
+
+        if ((b_val | (0x2020202020202020 & mask)) == (pattern & mask)) {
+
+          diff = 1;
+
+        } else {
+
+          diff = 0;
+
+        }
+
+        if ((o_b_val | (0x2020202020202020 & mask)) == (o_pattern & mask)) {
+
+          o_diff = 1;
+
+        } else {
+
+          diff = 0;
+
+        }
+
+        /*        fprintf(stderr, "DIFF3 idx=%03u shape=%02u %llx-%llx=%lx\n",
+           idx, h->shape + 1, o_pattern, o_b_val, o_diff); fprintf(stderr,
+           "DIFF3 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);*/
+        if (o_diff && diff) {
+
+          // this could be a lower to upper
 
-    u8 *new_buf = afl_realloc((void **)&afl->out_scratch_buf, len + num_len);
-    if (unlikely(!new_buf)) { PFATAL("alloc"); }
-    memcpy(new_buf, buf, idx);
+          u64 new_repl = (repl & (0x5f5f5f5f5f5f5f5f & mask));
+          //          fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
 
-    snprintf(new_buf + idx, num_len, "%lld", num);
-    memcpy(new_buf + idx + num_len, buf_8 + old_len, len - idx - old_len);
+          if (unlikely(cmp_extend_encoding(
+                  afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
+                  taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
 
-    if (unlikely(its_fuzz(afl, new_buf, len, status))) { return 1; }
+            return 1;
 
-  } else if (use_unum && unum == pattern) {
+          }
 
-    size_t old_len = endptr - buf_8;
-    size_t num_len = snprintf(NULL, 0, "%llu", unum);
+          // if (*status == 1) { fprintf(stderr, "FOUND!\n"); }
 
-    u8 *new_buf = afl_realloc((void **)&afl->out_scratch_buf, len + num_len);
-    if (unlikely(!new_buf)) { PFATAL("alloc"); }
-    memcpy(new_buf, buf, idx);
+        }
 
-    snprintf(new_buf + idx, num_len, "%llu", unum);
-    memcpy(new_buf + idx + num_len, buf_8 + old_len, len - idx - old_len);
+      }
+
+      // test for to uppercase, eg. "new_val = (user_val | 0x5f5f) ..."
+      if (*status != 1) {
+
+        if ((b_val & (0x5f5f5f5f5f5f5f5f & mask)) == (pattern & mask)) {
+
+          diff = 1;
+
+        } else {
+
+          diff = 0;
+
+        }
+
+        if ((o_b_val & (0x5f5f5f5f5f5f5f5f & mask)) == (o_pattern & mask)) {
+
+          o_diff = 1;
+
+        } else {
+
+          o_diff = 0;
+
+        }
+
+        /*        fprintf(stderr, "DIFF4 idx=%03u shape=%02u %llx-%llx=%lx\n",
+           idx, h->shape + 1, o_pattern, o_b_val, o_diff); fprintf(stderr,
+           "DIFF4 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);*/
+        if (o_diff && diff) {
 
-    if (unlikely(its_fuzz(afl, new_buf, len, status))) { return 1; }
+          // this could be a lower to upper
+
+          u64 new_repl = (repl | (0x2020202020202020 & mask));
+          //          fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+
+          if (unlikely(cmp_extend_encoding(
+                  afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
+                  taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
+
+            return 1;
+
+          }
+
+          // if (*status == 1) { fprintf(stderr, "FOUND!\n"); }
+
+        }
+
+      }
+
+      *status = 0;
+
+    }
 
   }
 
-  if (SHAPE_BYTES(h->shape) >= 8 && *status != 1) {
+#endif
 
-    if (its_len >= 8 && *buf_64 == pattern && *o_buf_64 == o_pattern) {
+  // we only allow this for ascii2integer (above) so leave if this is the case
+  if (unlikely(pattern == o_pattern)) { return 0; }
 
-      *buf_64 = repl;
-      if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-      *buf_64 = pattern;
+  if ((lvl & LVL1) || attr >= IS_FP_MOD) {
+
+    if (SHAPE_BYTES(h->shape) >= 8 && *status != 1) {
+
+      // if (its_len >= 8)
+      //   fprintf(stderr,
+      //           "TestU64: %u>=8 (idx=%u attr=%u) %llx==%llx"
+      //           " %llx==%llx <= %llx<-%llx\n",
+      //           its_len, idx, attr, *buf_64, pattern, *o_buf_64, o_pattern,
+      //           repl, changed_val);
+
+      // if this is an fcmp (attr & 8 == 8) then do not compare the patterns -
+      // due to a bug in llvm dynamic float bitcasts do not work :(
+      // the value 16 means this is a +- 1.0 test case
+      if (its_len >= 8 && ((*buf_64 == pattern && *o_buf_64 == o_pattern) ||
+                           attr >= IS_FP_MOD)) {
+
+        u64 tmp_64 = *buf_64;
+        *buf_64 = repl;
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+#ifdef CMPLOG_COMBINE
+        if (*status == 1) { memcpy(cbuf + idx, buf_64, 8); }
+#endif
+        *buf_64 = tmp_64;
+
+        // fprintf(stderr, "Status=%u\n", *status);
+
+      }
+
+      // reverse encoding
+      if (do_reverse && *status != 1) {
+
+        if (unlikely(cmp_extend_encoding(afl, h, SWAP64(pattern), SWAP64(repl),
+                                         SWAP64(o_pattern), SWAP64(changed_val),
+                                         attr, idx, taint_len, orig_buf, buf,
+                                         cbuf, len, 0, lvl, status))) {
+
+          return 1;
+
+        }
+
+      }
 
     }
 
-    // reverse encoding
-    if (do_reverse && *status != 1) {
+    if (SHAPE_BYTES(h->shape) >= 4 && *status != 1) {
 
-      if (unlikely(cmp_extend_encoding(afl, h, SWAP64(pattern), SWAP64(repl),
-                                       SWAP64(o_pattern), idx, orig_buf, buf,
-                                       len, 0, status))) {
+      // if (its_len >= 4 && (attr <= 1 || attr >= 8))
+      //   fprintf(stderr,
+      //           "TestU32: %u>=4 (idx=%u attr=%u) %x==%x"
+      //           " %x==%x <= %x<-%x\n",
+      //           its_len, idx, attr, *buf_32, (u32)pattern, *o_buf_32,
+      //           (u32)o_pattern, (u32)repl, (u32)changed_val);
 
-        return 1;
+      if (its_len >= 4 &&
+          ((*buf_32 == (u32)pattern && *o_buf_32 == (u32)o_pattern) ||
+           attr >= IS_FP_MOD)) {
+
+        u32 tmp_32 = *buf_32;
+        *buf_32 = (u32)repl;
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+#ifdef CMPLOG_COMBINE
+        if (*status == 1) { memcpy(cbuf + idx, buf_32, 4); }
+#endif
+        *buf_32 = tmp_32;
+
+        // fprintf(stderr, "Status=%u\n", *status);
+
+      }
+
+      // reverse encoding
+      if (do_reverse && *status != 1) {
+
+        if (unlikely(cmp_extend_encoding(afl, h, SWAP32(pattern), SWAP32(repl),
+                                         SWAP32(o_pattern), SWAP32(changed_val),
+                                         attr, idx, taint_len, orig_buf, buf,
+                                         cbuf, len, 0, lvl, status))) {
+
+          return 1;
+
+        }
+
+      }
+
+    }
+
+    if (SHAPE_BYTES(h->shape) >= 2 && *status != 1) {
+
+      if (its_len >= 2 &&
+          ((*buf_16 == (u16)pattern && *o_buf_16 == (u16)o_pattern) ||
+           attr >= IS_FP_MOD)) {
+
+        u16 tmp_16 = *buf_16;
+        *buf_16 = (u16)repl;
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+#ifdef CMPLOG_COMBINE
+        if (*status == 1) { memcpy(cbuf + idx, buf_16, 2); }
+#endif
+        *buf_16 = tmp_16;
+
+      }
+
+      // reverse encoding
+      if (do_reverse && *status != 1) {
+
+        if (unlikely(cmp_extend_encoding(afl, h, SWAP16(pattern), SWAP16(repl),
+                                         SWAP16(o_pattern), SWAP16(changed_val),
+                                         attr, idx, taint_len, orig_buf, buf,
+                                         cbuf, len, 0, lvl, status))) {
+
+          return 1;
+
+        }
+
+      }
+
+    }
+
+    if (*status != 1) {  // u8
+
+      // if (its_len >= 1)
+      //   fprintf(stderr,
+      //           "TestU8: %u>=1 (idx=%u attr=%u) %x==%x %x==%x <= %x<-%x\n",
+      //           its_len, idx, attr, *buf_8, (u8)pattern, *o_buf_8,
+      //           (u8)o_pattern, (u8)repl, (u8)changed_val);
+
+      if (its_len >= 1 &&
+          ((*buf_8 == (u8)pattern && *o_buf_8 == (u8)o_pattern) ||
+           attr >= IS_FP_MOD)) {
+
+        u8 tmp_8 = *buf_8;
+        *buf_8 = (u8)repl;
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+#ifdef CMPLOG_COMBINE
+        if (*status == 1) { cbuf[idx] = *buf_8; }
+#endif
+        *buf_8 = tmp_8;
 
       }
 
@@ -370,23 +1146,105 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
   }
 
-  if (SHAPE_BYTES(h->shape) >= 4 && *status != 1) {
+  // here we add and subract 1 from the value, but only if it is not an
+  // == or != comparison
+  // Bits: 1 = Equal, 2 = Greater, 4 = Lesser, 8 = Float
+  //       16 = modified float, 32 = modified integer (modified = wont match
+  //                                                   in original buffer)
 
-    if (its_len >= 4 && *buf_32 == (u32)pattern &&
-        *o_buf_32 == (u32)o_pattern) {
+#ifdef CMPLOG_SOLVE_ARITHMETIC
+  if (lvl < LVL3 || attr == IS_TRANSFORM) { return 0; }
 
-      *buf_32 = (u32)repl;
-      if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-      *buf_32 = pattern;
+  if (!(attr & (IS_GREATER | IS_LESSER)) || SHAPE_BYTES(h->shape) < 4) {
+
+    return 0;
+
+  }
+
+  // transform >= to < and <= to >
+  if ((attr & IS_EQUAL) && (attr & (IS_GREATER | IS_LESSER))) {
+
+    if (attr & 2) {
+
+      attr += 2;
+
+    } else {
+
+      attr -= 2;
 
     }
 
-    // reverse encoding
-    if (do_reverse && *status != 1) {
+  }
+
+  // lesser/greater FP comparison
+  if (attr >= IS_FP && attr < IS_FP_MOD) {
+
+    u64 repl_new;
+
+    if (attr & IS_GREATER) {
+
+      if (SHAPE_BYTES(h->shape) == 4 && its_len >= 4) {
+
+        float *f = (float *)&repl;
+        float  g = *f;
+        g += 1.0;
+        u32 *r = (u32 *)&g;
+        repl_new = (u32)*r;
+
+      } else if (SHAPE_BYTES(h->shape) == 8 && its_len >= 8) {
+
+        double *f = (double *)&repl;
+        double  g = *f;
+        g += 1.0;
+
+        u64 *r = (u64 *)&g;
+        repl_new = *r;
+
+      } else {
+
+        return 0;
+
+      }
+
+      changed_val = repl_new;
+
+      if (unlikely(cmp_extend_encoding(
+              afl, h, pattern, repl_new, o_pattern, changed_val, 16, idx,
+              taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
+
+        return 1;
+
+      }
+
+    } else {
+
+      if (SHAPE_BYTES(h->shape) == 4) {
+
+        float *f = (float *)&repl;
+        float  g = *f;
+        g -= 1.0;
+        u32 *r = (u32 *)&g;
+        repl_new = (u32)*r;
+
+      } else if (SHAPE_BYTES(h->shape) == 8) {
+
+        double *f = (double *)&repl;
+        double  g = *f;
+        g -= 1.0;
+        u64 *r = (u64 *)&g;
+        repl_new = *r;
 
-      if (unlikely(cmp_extend_encoding(afl, h, SWAP32(pattern), SWAP32(repl),
-                                       SWAP32(o_pattern), idx, orig_buf, buf,
-                                       len, 0, status))) {
+      } else {
+
+        return 0;
+
+      }
+
+      changed_val = repl_new;
+
+      if (unlikely(cmp_extend_encoding(
+              afl, h, pattern, repl_new, o_pattern, changed_val, 16, idx,
+              taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
 
         return 1;
 
@@ -394,25 +1252,62 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
     }
 
-  }
+    // transform double to float, llvm likes to do that internally ...
+    if (SHAPE_BYTES(h->shape) == 8 && its_len >= 4) {
 
-  if (SHAPE_BYTES(h->shape) >= 2 && *status != 1) {
+      double *f = (double *)&repl;
+      float   g = (float)*f;
+      repl_new = 0;
+  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+      memcpy((char *)&repl_new, (char *)&g, 4);
+  #else
+      memcpy(((char *)&repl_new) + 4, (char *)&g, 4);
+  #endif
+      changed_val = repl_new;
+      h->shape = 3;  // modify shape
 
-    if (its_len >= 2 && *buf_16 == (u16)pattern &&
-        *o_buf_16 == (u16)o_pattern) {
+      // fprintf(stderr, "DOUBLE2FLOAT %llx\n", repl_new);
 
-      *buf_16 = (u16)repl;
-      if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-      *buf_16 = (u16)pattern;
+      if (unlikely(cmp_extend_encoding(
+              afl, h, pattern, repl_new, o_pattern, changed_val, 16, idx,
+              taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
+
+        h->shape = 7;  // recover shape
+        return 1;
+
+      }
+
+      h->shape = 7;  // recover shape
 
     }
 
-    // reverse encoding
-    if (do_reverse && *status != 1) {
+  }
+
+  else if (attr < IS_FP) {
+
+    // lesser/greater integer comparison
+
+    u64 repl_new;
 
-      if (unlikely(cmp_extend_encoding(afl, h, SWAP16(pattern), SWAP16(repl),
-                                       SWAP16(o_pattern), idx, orig_buf, buf,
-                                       len, 0, status))) {
+    if (attr & IS_GREATER) {
+
+      repl_new = repl + 1;
+      changed_val = repl_new;
+      if (unlikely(cmp_extend_encoding(
+              afl, h, pattern, repl_new, o_pattern, changed_val, 32, idx,
+              taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
+
+        return 1;
+
+      }
+
+    } else {
+
+      repl_new = repl - 1;
+      changed_val = repl_new;
+      if (unlikely(cmp_extend_encoding(
+              afl, h, pattern, repl_new, o_pattern, changed_val, 32, idx,
+              taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
 
         return 1;
 
@@ -422,14 +1317,92 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
   }
 
-  /* avoid CodeQL warning on unsigned overflow */
-  if (/* SHAPE_BYTES(h->shape) >= 1 && */ *status != 1) {
+#endif                                           /* CMPLOG_SOLVE_ARITHMETIC */
+
+  return 0;
+
+}
 
-    if (its_len >= 1 && *buf_8 == (u8)pattern && *o_buf_8 == (u8)o_pattern) {
+#ifdef WORD_SIZE_64
+
+static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h,
+                               u128 pattern, u128 repl, u128 o_pattern,
+                               u128 changed_val, u8 attr, u32 idx,
+                               u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf,
+                               u32 len, u8 do_reverse, u8 lvl, u8 *status) {
+
+  u8 *ptr = (u8 *)&buf[idx];
+  u8 *o_ptr = (u8 *)&orig_buf[idx];
+  u8 *p = (u8 *)&pattern;
+  u8 *o_p = (u8 *)&o_pattern;
+  u8 *r = (u8 *)&repl;
+  u8  backup[16];
+  u32 its_len = MIN(len - idx, taint_len);
+  u32 shape = h->shape + 1;
+  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  size_t off = 0;
+  #else
+  size_t off = 16 - shape;
+  #endif
+
+  if (its_len >= shape) {
+
+  #ifdef _DEBUG
+    fprintf(stderr, "TestUN: %u>=%u (len=%u idx=%u attr=%u off=%lu) (%u) ",
+            its_len, shape, len, idx, attr, off, do_reverse);
+    u32 i;
+    u8 *o_r = (u8 *)&changed_val;
+    for (i = 0; i < shape; i++)
+      fprintf(stderr, "%02x", ptr[i]);
+    fprintf(stderr, "==");
+    for (i = 0; i < shape; i++)
+      fprintf(stderr, "%02x", p[off + i]);
+    fprintf(stderr, " ");
+    for (i = 0; i < shape; i++)
+      fprintf(stderr, "%02x", o_ptr[i]);
+    fprintf(stderr, "==");
+    for (i = 0; i < shape; i++)
+      fprintf(stderr, "%02x", o_p[off + i]);
+    fprintf(stderr, " <= ");
+    for (i = 0; i < shape; i++)
+      fprintf(stderr, "%02x", r[off + i]);
+    fprintf(stderr, "<-");
+    for (i = 0; i < shape; i++)
+      fprintf(stderr, "%02x", o_r[off + i]);
+    fprintf(stderr, "\n");
+  #endif
+
+    if (!memcmp(ptr, p + off, shape) && !memcmp(o_ptr, o_p + off, shape)) {
+
+      memcpy(backup, ptr, shape);
+      memcpy(ptr, r + off, shape);
 
-      *buf_8 = (u8)repl;
       if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-      *buf_8 = (u8)pattern;
+
+  #ifdef CMPLOG_COMBINE
+      if (*status == 1) { memcpy(cbuf + idx, r, shape); }
+  #endif
+
+      memcpy(ptr, backup, shape);
+
+  #ifdef _DEBUG
+      fprintf(stderr, "Status=%u\n", *status);
+  #endif
+
+    }
+
+    // reverse encoding
+    if (do_reverse && *status != 1) {
+
+      if (unlikely(cmp_extend_encodingN(
+              afl, h, SWAPN(pattern, (shape << 3)), SWAPN(repl, (shape << 3)),
+              SWAPN(o_pattern, (shape << 3)), SWAPN(changed_val, (shape << 3)),
+              attr, idx, taint_len, orig_buf, buf, cbuf, len, 0, lvl,
+              status))) {
+
+        return 1;
+
+      }
 
     }
 
@@ -439,16 +1412,14 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
 }
 
+#endif
+
 static void try_to_add_to_dict(afl_state_t *afl, u64 v, u8 shape) {
 
   u8 *b = (u8 *)&v;
 
   u32 k;
   u8  cons_ff = 0, cons_0 = 0;
-
-  if (shape > sizeof(v))
-    FATAL("shape is greater than %zu, please report!", sizeof(v));
-
   for (k = 0; k < shape; ++k) {
 
     if (b[k] == 0) {
@@ -493,28 +1464,93 @@ static void try_to_add_to_dict(afl_state_t *afl, u64 v, u8 shape) {
 
 }
 
-static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
+#ifdef WORD_SIZE_64
+static void try_to_add_to_dictN(afl_state_t *afl, u128 v, u8 size) {
 
-  struct cmp_header *h = &afl->shm.cmp_map->headers[key];
-  u32                i, j, idx;
+  u8 *b = (u8 *)&v;
+
+  u32 k;
+  u8  cons_ff = 0, cons_0 = 0;
+  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+  u32 off = 0;
+  for (k = 0; k < size; ++k) {
+
+  #else
+  u32 off = 16 - size;
+  for (k = 16 - size; k < 16; ++k) {
+
+  #endif
+    if (b[k] == 0) {
+
+      ++cons_0;
+
+    } else if (b[k] == 0xff) {
+
+      ++cons_ff;
+
+    } else {
+
+      cons_0 = cons_ff = 0;
+
+    }
+
+  }
 
-  u32 loggeds = h->hits;
-  if (h->hits > CMP_MAP_H) { loggeds = CMP_MAP_H; }
+  maybe_add_auto(afl, (u8 *)&v + off, size);
+  u128 rev = SWAPN(v, size);
+  maybe_add_auto(afl, (u8 *)&rev + off, size);
 
-  u8 status = 0;
-  // opt not in the paper
-  u32 fails;
-  u8  found_one = 0;
+}
+
+#endif
+
+#define SWAPA(_x) ((_x & 0xf8) + ((_x & 7) ^ 0x07))
+
+static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
+                   u32 len, u32 lvl, struct tainted *taint) {
+
+  struct cmp_header *h = &afl->shm.cmp_map->headers[key];
+  struct tainted *   t;
+  u32                i, j, idx, taint_len, loggeds;
+  u32                have_taint = 1;
+  u8                 status = 0, found_one = 0;
 
   /* loop cmps are useless, detect and ignore them */
+#ifdef WORD_SIZE_64
+  u32  is_n = 0;
+  u128 s128_v0 = 0, s128_v1 = 0, orig_s128_v0 = 0, orig_s128_v1 = 0;
+#endif
   u64 s_v0, s_v1;
   u8  s_v0_fixed = 1, s_v1_fixed = 1;
   u8  s_v0_inc = 1, s_v1_inc = 1;
   u8  s_v0_dec = 1, s_v1_dec = 1;
 
-  for (i = 0; i < loggeds; ++i) {
+  if (h->hits > CMP_MAP_H) {
+
+    loggeds = CMP_MAP_H;
+
+  } else {
+
+    loggeds = h->hits;
+
+  }
 
-    fails = 0;
+#ifdef WORD_SIZE_64
+  switch (SHAPE_BYTES(h->shape)) {
+
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+      break;
+    default:
+      is_n = 1;
+
+  }
+
+#endif
+
+  for (i = 0; i < loggeds; ++i) {
 
     struct cmp_operands *o = &afl->shm.cmp_map->log[key][i];
 
@@ -551,55 +1587,176 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
 
     }
 
-    for (idx = 0; idx < len && fails < 8; ++idx) {
+#ifdef _DEBUG
+    fprintf(stderr, "Handling: %llx->%llx vs %llx->%llx attr=%u shape=%u\n",
+            orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute,
+            SHAPE_BYTES(h->shape));
+#endif
+
+    t = taint;
+    while (t->next) {
+
+      t = t->next;
+
+    }
+
+#ifdef WORD_SIZE_64
+    if (unlikely(is_n)) {
+
+      s128_v0 = ((u128)o->v0) + (((u128)o->v0_128) << 64);
+      s128_v1 = ((u128)o->v1) + (((u128)o->v1_128) << 64);
+      orig_s128_v0 = ((u128)orig_o->v0) + (((u128)orig_o->v0_128) << 64);
+      orig_s128_v1 = ((u128)orig_o->v1) + (((u128)orig_o->v1_128) << 64);
+
+    }
+
+#endif
+
+    for (idx = 0; idx < len; ++idx) {
+
+      if (have_taint) {
+
+        if (!t || idx < t->pos) {
+
+          continue;
+
+        } else {
+
+          taint_len = t->pos + t->len - idx;
+
+          if (idx == t->pos + t->len - 1) { t = t->prev; }
+
+        }
+
+      } else {
+
+        taint_len = len - idx;
+
+      }
 
       status = 0;
-      if (unlikely(cmp_extend_encoding(afl, h, o->v0, o->v1, orig_o->v0, idx,
-                                       orig_buf, buf, len, 1, &status))) {
 
-        return 1;
+#ifdef WORD_SIZE_64
+      if (is_n) {  // _ExtInt special case including u128
+
+        if (s128_v0 != orig_s128_v0 && orig_s128_v0 != orig_s128_v1) {
+
+          if (unlikely(cmp_extend_encodingN(
+                  afl, h, s128_v0, s128_v1, orig_s128_v0, orig_s128_v1,
+                  h->attribute, idx, taint_len, orig_buf, buf, cbuf, len, 1,
+                  lvl, &status))) {
+
+            return 1;
+
+          }
+
+        }
+
+        if (status == 1) {
+
+          found_one = 1;
+          break;
+
+        }
+
+        if (s128_v1 != orig_s128_v1 && orig_s128_v1 != orig_s128_v0) {
+
+          if (unlikely(cmp_extend_encodingN(
+                  afl, h, s128_v1, s128_v0, orig_s128_v1, orig_s128_v0,
+                  SWAPA(h->attribute), idx, taint_len, orig_buf, buf, cbuf, len,
+                  1, lvl, &status))) {
+
+            return 1;
+
+          }
+
+        }
+
+        if (status == 1) {
+
+          found_one = 1;
+          break;
+
+        }
 
       }
 
-      if (status == 2) {
+#endif
+
+      // even for u128 and _ExtInt we do cmp_extend_encoding() because
+      // if we got here their own special trials failed and it might just be
+      // a cast from e.g. u64 to u128 from the input data.
 
-        ++fails;
+      if ((o->v0 != orig_o->v0 || lvl >= LVL3) && orig_o->v0 != orig_o->v1) {
 
-      } else if (status == 1) {
+        if (unlikely(cmp_extend_encoding(
+                afl, h, o->v0, o->v1, orig_o->v0, orig_o->v1, h->attribute, idx,
+                taint_len, orig_buf, buf, cbuf, len, 1, lvl, &status))) {
 
+          return 1;
+
+        }
+
+      }
+
+      if (status == 1) {
+
+        found_one = 1;
         break;
 
       }
 
       status = 0;
-      if (unlikely(cmp_extend_encoding(afl, h, o->v1, o->v0, orig_o->v1, idx,
-                                       orig_buf, buf, len, 1, &status))) {
+      if ((o->v1 != orig_o->v1 || lvl >= LVL3) && orig_o->v0 != orig_o->v1) {
 
-        return 1;
+        if (unlikely(cmp_extend_encoding(afl, h, o->v1, o->v0, orig_o->v1,
+                                         orig_o->v0, SWAPA(h->attribute), idx,
+                                         taint_len, orig_buf, buf, cbuf, len, 1,
+                                         lvl, &status))) {
 
-      }
+          return 1;
 
-      if (status == 2) {
+        }
 
-        ++fails;
+      }
 
-      } else if (status == 1) {
+      if (status == 1) {
 
+        found_one = 1;
         break;
 
       }
 
     }
 
-    if (status == 1) { found_one = 1; }
+#ifdef _DEBUG
+    fprintf(stderr,
+            "END: %llx->%llx vs %llx->%llx attr=%u i=%u found=%u "
+            "isN=%u size=%u\n",
+            orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, i, found_one,
+            is_n, SHAPE_BYTES(h->shape));
+#endif
 
     // If failed, add to dictionary
-    if (fails == 8) {
+    if (!found_one) {
 
       if (afl->pass_stats[key].total == 0) {
 
-        try_to_add_to_dict(afl, o->v0, SHAPE_BYTES(h->shape));
-        try_to_add_to_dict(afl, o->v1, SHAPE_BYTES(h->shape));
+#ifdef WORD_SIZE_64
+        if (unlikely(is_n)) {
+
+          try_to_add_to_dictN(afl, s128_v0, SHAPE_BYTES(h->shape));
+          try_to_add_to_dictN(afl, s128_v1, SHAPE_BYTES(h->shape));
+
+        } else
+
+#endif
+        {
+
+          try_to_add_to_dict(afl, o->v0, SHAPE_BYTES(h->shape));
+          try_to_add_to_dict(afl, o->v1, SHAPE_BYTES(h->shape));
+
+        }
 
       }
 
@@ -630,53 +1787,454 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
 }
 
 static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
-                              u8 *o_pattern, u32 idx, u8 *orig_buf, u8 *buf,
-                              u32 len, u8 *status) {
-
-  u32 i;
+                              u8 *o_pattern, u8 *changed_val, u32 idx,
+                              u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf,
+                              u32 len, u8 lvl, u8 *status) {
+
+#ifndef CMPLOG_COMBINE
+  (void)(cbuf);
+#endif
+#ifndef CMPLOG_SOLVE_TRANSFORM
+  (void)(changed_val);
+#endif
+
+  u8  save[40];
+  u32 saved_idx = idx, pre, from = 0, to = 0, i, j;
   u32 its_len = MIN((u32)32, len - idx);
+  its_len = MIN(its_len, taint_len);
+  u32 saved_its_len = its_len;
 
-  u8 save[32];
-  memcpy(save, &buf[idx], its_len);
+  if (lvl & LVL3) {
 
-  *status = 0;
+    u32 max_to = MIN(4U, idx);
+    if (!(lvl & LVL1) && max_to) { from = 1; }
+    to = max_to;
 
-  for (i = 0; i < its_len; ++i) {
+  }
 
-    if (pattern[i] != buf[idx + i] || o_pattern[i] != orig_buf[idx + i] ||
-        *status == 1) {
+  memcpy(save, &buf[saved_idx - to], its_len + to);
+  (void)(j);
 
-      break;
+#ifdef _DEBUG
+  fprintf(stderr, "RTN T idx=%u lvl=%02x ", idx, lvl);
+  for (j = 0; j < 8; j++)
+    fprintf(stderr, "%02x", orig_buf[idx + j]);
+  fprintf(stderr, " -> ");
+  for (j = 0; j < 8; j++)
+    fprintf(stderr, "%02x", o_pattern[j]);
+  fprintf(stderr, " <= ");
+  for (j = 0; j < 8; j++)
+    fprintf(stderr, "%02x", repl[j]);
+  fprintf(stderr, "\n");
+  fprintf(stderr, "                ");
+  for (j = 0; j < 8; j++)
+    fprintf(stderr, "%02x", buf[idx + j]);
+  fprintf(stderr, " -> ");
+  for (j = 0; j < 8; j++)
+    fprintf(stderr, "%02x", pattern[j]);
+  fprintf(stderr, " <= ");
+  for (j = 0; j < 8; j++)
+    fprintf(stderr, "%02x", changed_val[j]);
+  fprintf(stderr, "\n");
+#endif
+
+  // Try to match the replace value up to 4 bytes before the current idx.
+  // This allows matching of eg.:
+  //   if (memcmp(user_val, "TEST") == 0)
+  //     if (memcmp(user_val, "TEST-VALUE") == 0) ...
+  // We only do this in lvl 3, otherwise we only do direct matching
+
+  for (pre = from; pre <= to; pre++) {
+
+    if (*status != 1 && (!pre || !memcmp(buf + saved_idx - pre, repl, pre))) {
+
+      idx = saved_idx - pre;
+      its_len = saved_its_len + pre;
+
+      for (i = 0; i < its_len; ++i) {
+
+        if ((pattern[i] != buf[idx + i] && o_pattern[i] != orig_buf[idx + i]) ||
+            *status == 1) {
+
+          break;
+
+        }
+
+        buf[idx + i] = repl[i];
+
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+
+#ifdef CMPLOG_COMBINE
+        if (*status == 1) { memcpy(cbuf + idx, &buf[idx], i); }
+#endif
+
+      }
+
+      memcpy(&buf[idx], save + to - pre, i);
 
     }
 
-    buf[idx + i] = repl[i];
+  }
+
+#ifdef CMPLOG_SOLVE_TRANSFORM
+
+  if (*status == 1) return 0;
+
+  if (lvl & LVL3) {
+
+    u32 toupper = 0, tolower = 0, xor = 0, arith = 0, tohex = 0, fromhex = 0;
+  #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
+    u32 tob64 = 0, fromb64 = 0;
+  #endif
+    u32 from_0 = 0, from_x = 0, from_X = 0, from_slash = 0, from_up = 0;
+    u32 to_0 = 0, to_x = 0, to_slash = 0, to_up = 0;
+    u8  xor_val[32], arith_val[32], tmp[48];
+
+    idx = saved_idx;
+    its_len = saved_its_len;
+
+    memcpy(save, &buf[idx], its_len);
+
+    for (i = 0; i < its_len; ++i) {
+
+      xor_val[i] = pattern[i] ^ buf[idx + i];
+      arith_val[i] = pattern[i] - buf[idx + i];
+
+      if (i == 0) {
+
+        if (orig_buf[idx] == '0') {
+
+          from_0 = 1;
+
+        } else if (orig_buf[idx] == '\\') {
+
+          from_slash = 1;
+
+        }
+
+        if (repl[0] == '0') {
+
+          to_0 = 1;
+
+        } else if (repl[0] == '\\') {
+
+          to_slash = 1;
+
+        }
+
+      } else if (i == 1) {
+
+        if (orig_buf[idx + 1] == 'x') {
+
+          from_x = 1;
+
+        } else if (orig_buf[idx + 1] == 'X') {
+
+          from_X = from_x = 1;
+
+        }
+
+        if (repl[1] == 'x' || repl[1] == 'X') { to_x = 1; }
+
+      }
+
+      if (i < 16 && is_hex(repl + (i << 1))) {
+
+        ++tohex;
+
+        if (!to_up) {
+
+          if (repl[i << 1] >= 'A' && repl[i << 1] <= 'F')
+            to_up = 1;
+          else if (repl[i << 1] >= 'a' && repl[i << 1] <= 'f')
+            to_up = 2;
+          if (repl[(i << 1) + 1] >= 'A' && repl[(i << 1) + 1] <= 'F')
+            to_up = 1;
+          else if (repl[(i << 1) + 1] >= 'a' && repl[(i << 1) + 1] <= 'f')
+            to_up = 2;
+
+        }
+
+      }
+
+      if ((i % 2)) {
+
+        if (len > idx + i && is_hex(orig_buf + idx + i)) {
+
+          fromhex += 2;
+
+          if (!from_up) {
+
+            if (orig_buf[idx + i] >= 'A' && orig_buf[idx + i] <= 'F')
+              from_up = 1;
+            else if (orig_buf[idx + i] >= 'a' && orig_buf[idx + i] <= 'f')
+              from_up = 2;
+            if (orig_buf[idx + i - 1] >= 'A' && orig_buf[idx + i - 1] <= 'F')
+              from_up = 1;
+            else if (orig_buf[idx + i - 1] >= 'a' &&
+                     orig_buf[idx + i - 1] <= 'f')
+              from_up = 2;
+
+          }
+
+        }
+
+      }
+
+  #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
+      if (i % 3 == 2 && i < 24) {
+
+        if (is_base64(repl + ((i / 3) << 2))) tob64 += 3;
+
+      }
+
+      if (i % 4 == 3 && i < 24) {
+
+        if (is_base64(orig_buf + idx + i - 3)) fromb64 += 4;
+
+      }
+
+  #endif
+
+      if ((o_pattern[i] ^ orig_buf[idx + i]) == xor_val[i] && xor_val[i]) {
+
+        ++xor;
+
+      }
+
+      if ((o_pattern[i] - orig_buf[idx + i]) == arith_val[i] && arith_val[i]) {
+
+        ++arith;
+
+      }
+
+      if ((buf[idx + i] | 0x20) == pattern[i] &&
+          (orig_buf[idx + i] | 0x20) == o_pattern[i]) {
+
+        ++tolower;
+
+      }
+
+      if ((buf[idx + i] & 0x5a) == pattern[i] &&
+          (orig_buf[idx + i] & 0x5a) == o_pattern[i]) {
+
+        ++toupper;
+
+      }
+
+  #ifdef _DEBUG
+      fprintf(stderr,
+              "RTN idx=%u loop=%u xor=%u arith=%u tolower=%u toupper=%u "
+              "tohex=%u fromhex=%u to_0=%u to_slash=%u to_x=%u "
+              "from_0=%u from_slash=%u from_x=%u\n",
+              idx, i, xor, arith, tolower, toupper, tohex, fromhex, to_0,
+              to_slash, to_x, from_0, from_slash, from_x);
+    #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
+      fprintf(stderr, "RTN idx=%u loop=%u tob64=%u from64=%u\n", tob64,
+              fromb64);
+    #endif
+  #endif
+
+  #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
+      // input is base64 and converted to binary? convert repl to base64!
+      if ((i % 4) == 3 && i < 24 && fromb64 > i) {
+
+        to_base64(repl, tmp, i + 1);
+        memcpy(buf + idx, tmp, i + 1);
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT fromb64 %u result %u\n", fromb64,
+        // *status);
+
+      }
+
+      // input is converted to base64? decode repl with base64!
+      if ((i % 3) == 2 && i < 24 && tob64 > i) {
+
+        u32 olen = from_base64(repl, tmp, i + 1);
+        memcpy(buf + idx, tmp, olen);
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT tob64 %u idx=%u result %u\n", tob64,
+        // idx, *status);
+
+      }
+
+  #endif
+
+      // input is converted to hex? convert repl to binary!
+      if (i < 16 && tohex > i) {
+
+        u32 off;
+        if (to_slash + to_x + to_0 == 2) {
+
+          off = 2;
 
-    if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        } else {
+
+          off = 0;
+
+        }
+
+        for (j = 0; j <= i; j++)
+          tmp[j] = (hex_table[repl[off + (j << 1)] - '0'] << 4) +
+                   hex_table[repl[off + (j << 1) + 1] - '0'];
+
+        memcpy(buf + idx, tmp, i + 1);
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT tohex %u result %u\n", tohex, *status);
+
+      }
+
+      // input is hex and converted to binary? convert repl to hex!
+      if (i && (i % 2) && i < 16 && fromhex &&
+          fromhex + from_slash + from_x + from_0 > i) {
+
+        u8 off = 0;
+        if (from_slash && from_x) {
+
+          tmp[0] = '\\';
+          if (from_X) {
+
+            tmp[1] = 'X';
+
+          } else {
+
+            tmp[1] = 'x';
+
+          }
+
+          off = 2;
+
+        } else if (from_0 && from_x) {
+
+          tmp[0] = '0';
+          if (from_X) {
+
+            tmp[1] = 'X';
+
+          } else {
+
+            tmp[1] = 'x';
+
+          }
+
+          off = 2;
+
+        }
+
+        if (to_up == 1) {
+
+          for (j = 0; j <= (i >> 1); j++) {
+
+            tmp[off + (j << 1)] = hex_table_up[repl[j] >> 4];
+            tmp[off + (j << 1) + 1] = hex_table_up[repl[j] % 16];
+
+          }
+
+        } else {
+
+          for (j = 0; j <= (i >> 1); j++) {
+
+            tmp[off + (j << 1)] = hex_table_low[repl[j] >> 4];
+            tmp[off + (j << 1) + 1] = hex_table_low[repl[j] % 16];
+
+          }
+
+        }
+
+        memcpy(buf + idx, tmp, i + 1 + off);
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT fromhex %u result %u\n", fromhex,
+        // *status);
+        memcpy(buf + idx + i, save + i, i + 1 + off);
+
+      }
+
+      if (xor > i) {
+
+        for (j = 0; j <= i; j++)
+          buf[idx + j] = repl[j] ^ xor_val[j];
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT xor %u result %u\n", xor, *status);
+
+      }
+
+      if (arith > i && *status != 1) {
+
+        for (j = 0; j <= i; j++)
+          buf[idx + j] = repl[j] - arith_val[j];
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT arith %u result %u\n", arith, *status);
+
+      }
+
+      if (toupper > i && *status != 1) {
+
+        for (j = 0; j <= i; j++)
+          buf[idx + j] = repl[j] | 0x20;
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT toupper %u result %u\n", toupper,
+        // *status);
+
+      }
+
+      if (tolower > i && *status != 1) {
+
+        for (j = 0; j <= i; j++)
+          buf[idx + j] = repl[j] & 0x5f;
+        if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
+        // fprintf(stderr, "RTN ATTEMPT tolower %u result %u\n", tolower,
+        // *status);
+
+      }
+
+  #ifdef CMPLOG_COMBINE
+      if (*status == 1) { memcpy(cbuf + idx, &buf[idx], i + 1); }
+  #endif
+
+      if ((i >= 7 &&
+           (i >= xor&&i >= arith &&i >= tolower &&i >= toupper &&i > tohex &&i >
+                (fromhex + from_0 + from_x + from_slash + 1)
+  #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
+            && i > tob64 + 3 && i > fromb64 + 4
+  #endif
+            )) ||
+          repl[i] != changed_val[i] || *status == 1) {
+
+        break;
+
+      }
+
+    }
+
+    memcpy(&buf[idx], save, i);
 
   }
 
-  memcpy(&buf[idx], save, i);
+#endif
+
   return 0;
 
 }
 
-static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
+static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
+                   u32 len, u8 lvl, struct tainted *taint) {
 
+  struct tainted *   t;
   struct cmp_header *h = &afl->shm.cmp_map->headers[key];
-  u32                i, j, idx;
+  u32                i, j, idx, have_taint = 1, taint_len, loggeds;
+  u8                 status = 0, found_one = 0;
 
-  u32 loggeds = h->hits;
-  if (h->hits > CMP_MAP_RTN_H) { loggeds = CMP_MAP_RTN_H; }
+  if (h->hits > CMP_MAP_RTN_H) {
 
-  u8 status = 0;
-  // opt not in the paper
-  //  u32 fails = 0;
-  u8 found_one = 0;
+    loggeds = CMP_MAP_RTN_H;
 
-  for (i = 0; i < loggeds; ++i) {
+  } else {
 
-    u32 fails = 0;
+    loggeds = h->hits;
+
+  }
+
+  for (i = 0; i < loggeds; ++i) {
 
     struct cmpfn_operands *o =
         &((struct cmpfn_operands *)afl->shm.cmp_map->log[key])[i];
@@ -696,50 +2254,89 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
 
     }
 
-    for (idx = 0; idx < len && fails < 8; ++idx) {
+    /*
+      struct cmp_header *hh = &afl->orig_cmp_map->headers[key];
+    fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits, h->id,
+    h->shape, h->attribute); for (j = 0; j < 8; j++) fprintf(stderr, "%02x",
+    o->v0[j]); fprintf(stderr, " v1="); for (j = 0; j < 8; j++) fprintf(stderr,
+    "%02x", o->v1[j]); fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u
+    o0=", hh->hits, hh->id, hh->shape, hh->attribute); for (j = 0; j < 8; j++)
+      fprintf(stderr, "%02x", orig_o->v0[j]);
+    fprintf(stderr, " o1=");
+    for (j = 0; j < 8; j++)
+      fprintf(stderr, "%02x", orig_o->v1[j]);
+    fprintf(stderr, "\n");
+    */
+
+    t = taint;
+    while (t->next) {
+
+      t = t->next;
 
-      if (unlikely(rtn_extend_encoding(afl, o->v0, o->v1, orig_o->v0, idx,
-                                       orig_buf, buf, len, &status))) {
+    }
 
-        return 1;
+    for (idx = 0; idx < len; ++idx) {
 
-      }
+      if (have_taint) {
 
-      if (status == 2) {
+        if (!t || idx < t->pos) {
 
-        ++fails;
+          continue;
 
-      } else if (status == 1) {
+        } else {
 
-        break;
+          taint_len = t->pos + t->len - idx;
+
+          if (idx == t->pos + t->len - 1) { t = t->prev; }
+
+        }
+
+      } else {
+
+        taint_len = len - idx;
 
       }
 
-      if (unlikely(rtn_extend_encoding(afl, o->v1, o->v0, orig_o->v1, idx,
-                                       orig_buf, buf, len, &status))) {
+      status = 0;
+
+      if (unlikely(rtn_extend_encoding(afl, o->v0, o->v1, orig_o->v0,
+                                       orig_o->v1, idx, taint_len, orig_buf,
+                                       buf, cbuf, len, lvl, &status))) {
 
         return 1;
 
       }
 
-      if (status == 2) {
+      if (status == 1) {
 
-        ++fails;
+        found_one = 1;
+        break;
 
-      } else if (status == 1) {
+      }
 
+      status = 0;
+
+      if (unlikely(rtn_extend_encoding(afl, o->v1, o->v0, orig_o->v1,
+                                       orig_o->v0, idx, taint_len, orig_buf,
+                                       buf, cbuf, len, lvl, &status))) {
+
+        return 1;
+
+      }
+
+      if (status == 1) {
+
+        found_one = 1;
         break;
 
       }
 
     }
 
-    if (status == 1) { found_one = 1; }
-
     // If failed, add to dictionary
-    if (fails == 8) {
+    if (!found_one && (lvl & LVL1)) {
 
-      if (afl->pass_stats[key].total == 0) {
+      if (unlikely(!afl->pass_stats[key].total)) {
 
         maybe_add_auto(afl, o->v0, SHAPE_BYTES(h->shape));
         maybe_add_auto(afl, o->v1, SHAPE_BYTES(h->shape));
@@ -768,54 +2365,147 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
 ///// Input to State stage
 
 // afl->queue_cur->exec_cksum
-u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
-                        u64 exec_cksum) {
+u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
 
   u8 r = 1;
-  if (unlikely(!afl->orig_cmp_map)) {
+  if (unlikely(!afl->pass_stats)) {
 
-    afl->orig_cmp_map = ck_alloc_nozero(sizeof(struct cmp_map));
+    afl->pass_stats = ck_alloc(sizeof(struct afl_pass_stat) * CMP_MAP_W);
 
   }
 
-  if (unlikely(!afl->pass_stats)) {
+  struct tainted *taint = NULL;
 
-    afl->pass_stats = ck_alloc(sizeof(struct afl_pass_stat) * CMP_MAP_W);
+  if (!afl->queue_cur->taint || !afl->queue_cur->cmplog_colorinput) {
+
+    if (unlikely(colorization(afl, buf, len, &taint))) { return 1; }
+
+    // no taint? still try, create a dummy to prevent again colorization
+    if (!taint) {
+
+#ifdef _DEBUG
+      fprintf(stderr, "TAINT FAILED\n");
+#endif
+      afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+      return 0;
+
+    }
+
+#ifdef _DEBUG
+    else if (taint->pos == 0 && taint->len == len) {
+
+      fprintf(stderr, "TAINT FULL\n");
+
+    }
+
+#endif
+
+  } else {
+
+    buf = afl->queue_cur->cmplog_colorinput;
+    taint = afl->queue_cur->taint;
 
   }
 
-  // do it manually, forkserver clear only afl->fsrv.trace_bits
-  memset(afl->shm.cmp_map->headers, 0, sizeof(afl->shm.cmp_map->headers));
+  struct tainted *t = taint;
+
+  while (t) {
+
+#ifdef _DEBUG
+    fprintf(stderr, "T: idx=%u len=%u\n", t->pos, t->len);
+#endif
+    t = t->next;
+
+  }
+
+#if defined(_DEBUG) || defined(CMPLOG_INTROSPECTION)
+  u64 start_time = get_cur_time();
+  u32 cmp_locations = 0;
+#endif
+
+  // Generate the cmplog data
+
+  // manually clear the full cmp_map
+  memset(afl->shm.cmp_map, 0, sizeof(struct cmp_map));
+  if (unlikely(common_fuzz_cmplog_stuff(afl, orig_buf, len))) {
+
+    afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+    while (taint) {
+
+      t = taint->next;
+      ck_free(taint);
+      taint = t;
+
+    }
+
+    return 1;
+
+  }
+
+  if (unlikely(!afl->orig_cmp_map)) {
+
+    afl->orig_cmp_map = ck_alloc_nozero(sizeof(struct cmp_map));
 
-  if (unlikely(common_fuzz_cmplog_stuff(afl, buf, len))) { return 1; }
+  }
 
   memcpy(afl->orig_cmp_map, afl->shm.cmp_map, sizeof(struct cmp_map));
+  memset(afl->shm.cmp_map->headers, 0, sizeof(struct cmp_header) * CMP_MAP_W);
+  if (unlikely(common_fuzz_cmplog_stuff(afl, buf, len))) {
+
+    afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+    while (taint) {
 
-  if (unlikely(colorization(afl, buf, len, exec_cksum))) { return 1; }
+      t = taint->next;
+      ck_free(taint);
+      taint = t;
+
+    }
+
+    return 1;
+
+  }
 
-  // do it manually, forkserver clear only afl->fsrv.trace_bits
-  memset(afl->shm.cmp_map->headers, 0, sizeof(afl->shm.cmp_map->headers));
+#ifdef _DEBUG
+  dump("ORIG", orig_buf, len);
+  dump("NEW ", buf, len);
+#endif
 
-  if (unlikely(common_fuzz_cmplog_stuff(afl, buf, len))) { return 1; }
+  // Start insertion loop
 
   u64 orig_hit_cnt, new_hit_cnt;
   u64 orig_execs = afl->fsrv.total_execs;
   orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
+  u64 screen_update = 100000 / afl->queue_cur->exec_us,
+      execs = afl->fsrv.total_execs;
 
   afl->stage_name = "input-to-state";
   afl->stage_short = "its";
   afl->stage_max = 0;
   afl->stage_cur = 0;
 
+  u32 lvl = (afl->queue_cur->colorized ? 0 : LVL1) +
+            (afl->cmplog_lvl == CMPLOG_LVL_MAX ? LVL3 : 0);
+
+#ifdef CMPLOG_COMBINE
+  u8 *cbuf = afl_realloc((void **)&afl->in_scratch_buf, len + 128);
+  memcpy(cbuf, orig_buf, len);
+  u8 *virgin_backup = afl_realloc((void **)&afl->ex_buf, afl->shm.map_size);
+  memcpy(virgin_backup, afl->virgin_bits, afl->shm.map_size);
+#else
+  u8 *cbuf = NULL;
+#endif
+
   u32 k;
   for (k = 0; k < CMP_MAP_W; ++k) {
 
     if (!afl->shm.cmp_map->headers[k].hits) { continue; }
 
-    if (afl->pass_stats[k].total &&
-        (rand_below(afl, afl->pass_stats[k].total) >=
-             afl->pass_stats[k].faileds ||
-         afl->pass_stats[k].total == 0xff)) {
+    if (afl->pass_stats[k].faileds >= CMPLOG_FAIL_MAX ||
+        afl->pass_stats[k].total >= CMPLOG_FAIL_MAX) {
+
+#ifdef _DEBUG
+      fprintf(stderr, "DISABLED %u\n", k);
+#endif
 
       afl->shm.cmp_map->headers[k].hits = 0;  // ignore this cmp
 
@@ -839,13 +2529,37 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
 
     if (!afl->shm.cmp_map->headers[k].hits) { continue; }
 
+#if defined(_DEBUG) || defined(CMPLOG_INTROSPECTION)
+    ++cmp_locations;
+#endif
+
     if (afl->shm.cmp_map->headers[k].type == CMP_TYPE_INS) {
 
-      if (unlikely(cmp_fuzz(afl, k, orig_buf, buf, len))) { goto exit_its; }
+      if (unlikely(cmp_fuzz(afl, k, orig_buf, buf, cbuf, len, lvl, taint))) {
 
-    } else {
+        goto exit_its;
 
-      if (unlikely(rtn_fuzz(afl, k, orig_buf, buf, len))) { goto exit_its; }
+      }
+
+    } else if ((lvl & LVL1)
+
+#ifdef CMPLOG_SOLVE_TRANSFORM
+               || (lvl & LVL3)
+#endif
+    ) {
+
+      if (unlikely(rtn_fuzz(afl, k, orig_buf, buf, cbuf, len, lvl, taint))) {
+
+        goto exit_its;
+
+      }
+
+    }
+
+    if (afl->fsrv.total_execs - execs > screen_update) {
+
+      execs = afl->fsrv.total_execs;
+      show_stats(afl);
 
     }
 
@@ -854,11 +2568,122 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
   r = 0;
 
 exit_its:
+
+  if (afl->cmplog_lvl == CMPLOG_LVL_MAX) {
+
+    afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+
+    ck_free(afl->queue_cur->cmplog_colorinput);
+    while (taint) {
+
+      t = taint->next;
+      ck_free(taint);
+      taint = t;
+
+    }
+
+    afl->queue_cur->taint = NULL;
+
+  } else {
+
+    afl->queue_cur->colorized = LVL2;
+
+    if (!afl->queue_cur->taint) { afl->queue_cur->taint = taint; }
+
+    if (!afl->queue_cur->cmplog_colorinput) {
+
+      afl->queue_cur->cmplog_colorinput = ck_alloc_nozero(len);
+      memcpy(afl->queue_cur->cmplog_colorinput, buf, len);
+      memcpy(buf, orig_buf, len);
+
+    }
+
+  }
+
+#ifdef CMPLOG_COMBINE
+  if (afl->queued_paths + afl->unique_crashes > orig_hit_cnt + 1) {
+
+    // copy the current virgin bits so we can recover the information
+    u8 *virgin_save = afl_realloc((void **)&afl->eff_buf, afl->shm.map_size);
+    memcpy(virgin_save, afl->virgin_bits, afl->shm.map_size);
+    // reset virgin bits to the backup previous to redqueen
+    memcpy(afl->virgin_bits, virgin_backup, afl->shm.map_size);
+
+    u8 status = 0;
+    its_fuzz(afl, cbuf, len, &status);
+
+  // now combine with the saved virgin bits
+  #ifdef WORD_SIZE_64
+    u64 *v = (u64 *)afl->virgin_bits;
+    u64 *s = (u64 *)virgin_save;
+    u32  i;
+    for (i = 0; i < (afl->shm.map_size >> 3); i++) {
+
+      v[i] &= s[i];
+
+    }
+
+  #else
+    u32 *v = (u32 *)afl->virgin_bits;
+    u32 *s = (u32 *)virgin_save;
+    u32 i;
+    for (i = 0; i < (afl->shm.map_size >> 2); i++) {
+
+      v[i] &= s[i];
+
+    }
+
+  #endif
+
+  #ifdef _DEBUG
+    dump("COMB", cbuf, len);
+    if (status == 1) {
+
+      fprintf(stderr, "NEW CMPLOG_COMBINED\n");
+
+    } else {
+
+      fprintf(stderr, "NO new combined\n");
+
+    }
+
+  #endif
+
+  }
+
+#endif
+
   new_hit_cnt = afl->queued_paths + afl->unique_crashes;
   afl->stage_finds[STAGE_ITS] += new_hit_cnt - orig_hit_cnt;
   afl->stage_cycles[STAGE_ITS] += afl->fsrv.total_execs - orig_execs;
 
-  memcpy(buf, orig_buf, len);
+#if defined(_DEBUG) || defined(CMPLOG_INTROSPECTION)
+  FILE *f = stderr;
+  #ifndef _DEBUG
+  if (afl->not_on_tty) {
+
+    char fn[4096];
+    snprintf(fn, sizeof(fn), "%s/introspection_cmplog.txt", afl->out_dir);
+    f = fopen(fn, "a");
+
+  }
+
+  #endif
+
+  if (f) {
+
+    fprintf(f,
+            "Cmplog: fname=%s len=%u ms=%llu result=%u finds=%llu entries=%u\n",
+            afl->queue_cur->fname, len, get_cur_time() - start_time, r,
+            new_hit_cnt - orig_hit_cnt, cmp_locations);
+
+  #ifndef _DEBUG
+    if (afl->not_on_tty) { fclose(f); }
+  #endif
+
+  }
+
+#endif
 
   return r;
 
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index 17c305ed..97cb7415 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -627,9 +627,8 @@ void sync_fuzzers(afl_state_t *afl) {
     }
 
     if (m >= n) { goto close_sync; }  // nothing new
-    o = n - 1;
 
-    while (o >= m) {
+    for (o = m; o < n; o++) {
 
       s32         fd;
       struct stat st;
@@ -637,7 +636,6 @@ void sync_fuzzers(afl_state_t *afl) {
       snprintf(path, sizeof(path), "%s/%s", qd_path, namelist[o]->d_name);
       afl->syncing_case = next_min_accept;
       next_min_accept++;
-      o--;
 
       /* Allow this to fail in case the other fuzzer is resuming or so... */
 
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 60c9684c..5040e3ef 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -102,6 +102,7 @@ void afl_state_init(afl_state_t *afl, uint32_t map_size) {
   afl->stats_update_freq = 1;
   afl->stats_avg_exec = 0;
   afl->skip_deterministic = 1;
+  afl->cmplog_lvl = 1;
 #ifndef NO_SPLICING
   afl->use_splicing = 1;
 #endif
@@ -235,6 +236,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_custom_mutator_only =
                 get_afl_env(afl_environment_variables[i]) ? 1 : 0;
 
+          } else if (!strncmp(env, "AFL_CMPLOG_ONLY_NEW",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_cmplog_only_new =
+                get_afl_env(afl_environment_variables[i]) ? 1 : 0;
+
           } else if (!strncmp(env, "AFL_NO_UI", afl_environment_variable_len)) {
 
             afl->afl_env.afl_no_ui =
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index e67bace9..66efeb20 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -60,8 +60,10 @@ void write_setup_file(afl_state_t *afl, u32 argc, char **argv) {
     if (i) fprintf(f, " ");
 #ifdef __ANDROID__
     if (memchr(argv[i], '\'', sizeof(argv[i]))) {
+
 #else
     if (index(argv[i], '\'')) {
+
 #endif
 
       fprintf(f, "'");
@@ -87,6 +89,100 @@ void write_setup_file(afl_state_t *afl, u32 argc, char **argv) {
 
 }
 
+/* load some of the existing stats file when resuming.*/
+void load_stats_file(afl_state_t *afl) {
+
+  FILE *f;
+  u8    buf[MAX_LINE];
+  u8 *  lptr;
+  u8    fn[PATH_MAX];
+  u32   lineno = 0;
+  snprintf(fn, PATH_MAX, "%s/fuzzer_stats", afl->out_dir);
+  f = fopen(fn, "r");
+  if (!f) {
+
+    WARNF("Unable to load stats file '%s'", fn);
+    return;
+
+  }
+
+  while ((lptr = fgets(buf, MAX_LINE, f))) {
+
+    lineno++;
+    u8 *lstartptr = lptr;
+    u8 *rptr = lptr + strlen(lptr) - 1;
+    u8  keystring[MAX_LINE];
+    while (*lptr != ':' && lptr < rptr) {
+
+      lptr++;
+
+    }
+
+    if (*lptr == '\n' || !*lptr) {
+
+      WARNF("Unable to read line %d of stats file", lineno);
+      continue;
+
+    }
+
+    if (*lptr == ':') {
+
+      *lptr = 0;
+      strcpy(keystring, lstartptr);
+      lptr++;
+      char *nptr;
+      switch (lineno) {
+
+        case 3:
+          if (!strcmp(keystring, "run_time          "))
+            afl->prev_run_time = 1000 * strtoull(lptr, &nptr, 10);
+          break;
+        case 5:
+          if (!strcmp(keystring, "cycles_done       "))
+            afl->queue_cycle =
+                strtoull(lptr, &nptr, 10) ? strtoull(lptr, &nptr, 10) + 1 : 0;
+          break;
+        case 7:
+          if (!strcmp(keystring, "execs_done        "))
+            afl->fsrv.total_execs = strtoull(lptr, &nptr, 10);
+          break;
+        case 10:
+          if (!strcmp(keystring, "paths_total       "))
+            afl->queued_paths = strtoul(lptr, &nptr, 10);
+          break;
+        case 12:
+          if (!strcmp(keystring, "paths_found       "))
+            afl->queued_discovered = strtoul(lptr, &nptr, 10);
+          break;
+        case 13:
+          if (!strcmp(keystring, "paths_imported    "))
+            afl->queued_imported = strtoul(lptr, &nptr, 10);
+          break;
+        case 14:
+          if (!strcmp(keystring, "max_depth         "))
+            afl->max_depth = strtoul(lptr, &nptr, 10);
+          break;
+        case 21:
+          if (!strcmp(keystring, "unique_crashes    "))
+            afl->unique_crashes = strtoull(lptr, &nptr, 10);
+          break;
+        case 22:
+          if (!strcmp(keystring, "unique_hangs      "))
+            afl->unique_hangs = strtoull(lptr, &nptr, 10);
+          break;
+        default:
+          break;
+
+      }
+
+    }
+
+  }
+
+  return;
+
+}
+
 /* Update stats file for unattended monitoring. */
 
 void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
@@ -177,12 +273,13 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
           "\n"
           "target_mode       : %s%s%s%s%s%s%s%s%s\n"
           "command_line      : %s\n",
-          afl->start_time / 1000, cur_time / 1000,
-          (cur_time - afl->start_time) / 1000, (u32)getpid(),
-          afl->queue_cycle ? (afl->queue_cycle - 1) : 0, afl->cycles_wo_finds,
-          afl->fsrv.total_execs,
+          (afl->start_time - afl->prev_run_time) / 1000, cur_time / 1000,
+          (afl->prev_run_time + cur_time - afl->start_time) / 1000,
+          (u32)getpid(), afl->queue_cycle ? (afl->queue_cycle - 1) : 0,
+          afl->cycles_wo_finds, afl->fsrv.total_execs,
           afl->fsrv.total_execs /
-              ((double)(get_cur_time() - afl->start_time) / 1000),
+              ((double)(afl->prev_run_time + get_cur_time() - afl->start_time) /
+               1000),
           afl->last_avg_execs_saved, afl->queued_paths, afl->queued_favored,
           afl->queued_discovered, afl->queued_imported, afl->max_depth,
           afl->current_entry, afl->pending_favored, afl->pending_not_fuzzed,
@@ -377,8 +474,8 @@ void show_stats(afl_state_t *afl) {
 
     if (likely(cur_ms != afl->start_time)) {
 
-      afl->stats_avg_exec =
-          ((double)afl->fsrv.total_execs) * 1000 / (cur_ms - afl->start_time);
+      afl->stats_avg_exec = ((double)afl->fsrv.total_execs) * 1000 /
+                            (afl->prev_run_time + cur_ms - afl->start_time);
 
     }
 
@@ -590,7 +687,7 @@ void show_stats(afl_state_t *afl) {
 
   }
 
-  u_stringify_time_diff(time_tmp, cur_ms, afl->start_time);
+  u_stringify_time_diff(time_tmp, afl->prev_run_time + cur_ms, afl->start_time);
   SAYF(bV bSTOP "        run time : " cRST "%-33s " bSTG bV bSTOP
                 "  cycles done : %s%-5s " bSTG              bV "\n",
        time_tmp, tmp, u_stringify_int(IB(0), afl->queue_cycle - 1));
@@ -1012,8 +1109,8 @@ void show_stats(afl_state_t *afl) {
 
 void show_init_stats(afl_state_t *afl) {
 
-  struct queue_entry *q = afl->queue;
-  u32                 min_bits = 0, max_bits = 0, max_len = 0, count = 0;
+  struct queue_entry *q;
+  u32                 min_bits = 0, max_bits = 0, max_len = 0, count = 0, i;
   u64                 min_us = 0, max_us = 0;
   u64                 avg_us = 0;
 
@@ -1026,7 +1123,10 @@ void show_init_stats(afl_state_t *afl) {
 
   }
 
-  while (q) {
+  for (i = 0; i < afl->queued_paths; i++) {
+
+    q = afl->queue_buf[i];
+    if (unlikely(q->disabled)) { continue; }
 
     if (!min_us || q->exec_us < min_us) { min_us = q->exec_us; }
     if (q->exec_us > max_us) { max_us = q->exec_us; }
@@ -1037,7 +1137,6 @@ void show_init_stats(afl_state_t *afl) {
     if (q->len > max_len) { max_len = q->len; }
 
     ++count;
-    q = q->next;
 
   }
 
diff --git a/src/afl-fuzz-statsd.c b/src/afl-fuzz-statsd.c
index 69cafd90..461bbbf6 100644
--- a/src/afl-fuzz-statsd.c
+++ b/src/afl-fuzz-statsd.c
@@ -1,3 +1,8 @@
+/*
+ * This implements rpc.statsd support, see docs/rpc_statsd.md
+ *
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/socket.h>
@@ -226,37 +231,39 @@ int statsd_format_metric(afl_state_t *afl, char *buff, size_t bufflen) {
   */
   if (afl->statsd_metric_format_type == STATSD_TAGS_TYPE_SUFFIX) {
 
-    snprintf(buff, bufflen, afl->statsd_metric_format,
-             afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
-             afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
-             afl->fsrv.total_execs /
-                 ((double)(get_cur_time() - afl->start_time) / 1000),
-             tags, afl->queued_paths, tags, afl->queued_favored, tags,
-             afl->queued_discovered, tags, afl->queued_imported, tags,
-             afl->max_depth, tags, afl->current_entry, tags,
-             afl->pending_favored, tags, afl->pending_not_fuzzed, tags,
-             afl->queued_variable, tags, afl->unique_crashes, tags,
-             afl->unique_hangs, tags, afl->total_crashes, tags,
-             afl->slowest_exec_ms, tags,
-             count_non_255_bytes(afl, afl->virgin_bits), tags,
-             afl->var_byte_count, tags, afl->expand_havoc, tags);
+    snprintf(
+        buff, bufflen, afl->statsd_metric_format,
+        afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
+        afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
+        afl->fsrv.total_execs /
+            ((double)(get_cur_time() + afl->prev_run_time - afl->start_time) /
+             1000),
+        tags, afl->queued_paths, tags, afl->queued_favored, tags,
+        afl->queued_discovered, tags, afl->queued_imported, tags,
+        afl->max_depth, tags, afl->current_entry, tags, afl->pending_favored,
+        tags, afl->pending_not_fuzzed, tags, afl->queued_variable, tags,
+        afl->unique_crashes, tags, afl->unique_hangs, tags, afl->total_crashes,
+        tags, afl->slowest_exec_ms, tags,
+        count_non_255_bytes(afl, afl->virgin_bits), tags, afl->var_byte_count,
+        tags, afl->expand_havoc, tags);
 
   } else if (afl->statsd_metric_format_type == STATSD_TAGS_TYPE_MID) {
 
-    snprintf(buff, bufflen, afl->statsd_metric_format, tags,
-             afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
-             afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
-             afl->fsrv.total_execs /
-                 ((double)(get_cur_time() - afl->start_time) / 1000),
-             tags, afl->queued_paths, tags, afl->queued_favored, tags,
-             afl->queued_discovered, tags, afl->queued_imported, tags,
-             afl->max_depth, tags, afl->current_entry, tags,
-             afl->pending_favored, tags, afl->pending_not_fuzzed, tags,
-             afl->queued_variable, tags, afl->unique_crashes, tags,
-             afl->unique_hangs, tags, afl->total_crashes, tags,
-             afl->slowest_exec_ms, tags,
-             count_non_255_bytes(afl, afl->virgin_bits), tags,
-             afl->var_byte_count, tags, afl->expand_havoc);
+    snprintf(
+        buff, bufflen, afl->statsd_metric_format, tags,
+        afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
+        afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
+        afl->fsrv.total_execs /
+            ((double)(get_cur_time() + afl->prev_run_time - afl->start_time) /
+             1000),
+        tags, afl->queued_paths, tags, afl->queued_favored, tags,
+        afl->queued_discovered, tags, afl->queued_imported, tags,
+        afl->max_depth, tags, afl->current_entry, tags, afl->pending_favored,
+        tags, afl->pending_not_fuzzed, tags, afl->queued_variable, tags,
+        afl->unique_crashes, tags, afl->unique_hangs, tags, afl->total_crashes,
+        tags, afl->slowest_exec_ms, tags,
+        count_non_255_bytes(afl, afl->virgin_bits), tags, afl->var_byte_count,
+        tags, afl->expand_havoc);
 
   }
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 7facf261..8eb3625b 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -77,13 +77,8 @@ static void at_exit() {
   }
 
   int kill_signal = SIGKILL;
-
   /* AFL_KILL_SIGNAL should already be a valid int at this point */
-  if (getenv("AFL_KILL_SIGNAL")) {
-
-    kill_signal = atoi(getenv("AFL_KILL_SIGNAL"));
-
-  }
+  if ((ptr = getenv("AFL_KILL_SIGNAL"))) { kill_signal = atoi(ptr); }
 
   if (pid1 > 0) { kill(pid1, kill_signal); }
   if (pid2 > 0) { kill(pid2, kill_signal); }
@@ -103,13 +98,14 @@ static void usage(u8 *argv0, int more_help) {
 
       "Execution control settings:\n"
       "  -p schedule   - power schedules compute a seed's performance score:\n"
-      "                  <fast(default), rare, exploit, seek, mmopt, coe, "
-      "explore,\n"
-      "                  lin, quad> -- see docs/power_schedules.md\n"
+      "                  fast(default), explore, exploit, seek, rare, mmopt, "
+      "coe, lin\n"
+      "                  quad -- see docs/power_schedules.md\n"
       "  -f file       - location read by the fuzzed program (default: stdin "
       "or @@)\n"
       "  -t msec       - timeout for each run (auto-scaled, 50-%u ms)\n"
-      "  -m megs       - memory limit for child process (%u MB, 0 = no limit)\n"
+      "  -m megs       - memory limit for child process (%u MB, 0 = no limit "
+      "[default])\n"
       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
       "  -U            - use unicorn-based instrumentation (Unicorn mode)\n"
       "  -W            - use qemu-based instrumentation with Wine (Wine "
@@ -125,7 +121,10 @@ static void usage(u8 *argv0, int more_help) {
       "                  See docs/README.MOpt.md\n"
       "  -c program    - enable CmpLog by specifying a binary compiled for "
       "it.\n"
-      "                  if using QEMU, just use -c 0.\n\n"
+      "                  if using QEMU, just use -c 0.\n"
+      "  -l cmplog_level - set the complexity/intensivity of CmpLog.\n"
+      "                  Values: 1 (basic), 2 (larger files) and 3 "
+      "(transform)\n\n"
 
       "Fuzzing behavior settings:\n"
       "  -Z            - sequential queue selection instead of weighted "
@@ -146,7 +145,8 @@ static void usage(u8 *argv0, int more_help) {
 
       "Other stuff:\n"
       "  -M/-S id      - distributed mode (see docs/parallel_fuzzing.md)\n"
-      "                  -M auto-sets -D and -Z (use -d to disable -D)\n"
+      "                  -M auto-sets -D, -Z (use -d to disable -D) and no "
+      "trimming\n"
       "  -F path       - sync to a foreign fuzzer queue directory (requires "
       "-M, can\n"
       "                  be specified up to %u times)\n"
@@ -182,6 +182,7 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_AUTORESUME: resume fuzzing if directory specified by -o already exists\n"
       "AFL_BENCH_JUST_ONE: run the target just once\n"
       "AFL_BENCH_UNTIL_CRASH: exit soon when the first crashing input has been found\n"
+      "AFL_CMPLOG_ONLY_NEW: do not run cmplog on initial testcases (good for resumes!)\n"
       "AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n"
       "AFL_CUSTOM_MUTATOR_LIBRARY: lib with afl_custom_fuzz() to mutate inputs\n"
       "AFL_CUSTOM_MUTATOR_ONLY: avoid AFL++'s internal mutators\n"
@@ -197,6 +198,7 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_FORKSRV_INIT_TMOUT: time spent waiting for forkserver during startup (in milliseconds)\n"
       "AFL_HANG_TMOUT: override timeout value (in milliseconds)\n"
       "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: don't warn about core dump handlers\n"
+      "AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n"
       "AFL_IMPORT_FIRST: sync and import test cases from other fuzzer instances first\n"
       "AFL_KILL_SIGNAL: Signal ID delivered to child processes on timeout, etc. (default: SIGKILL)\n"
       "AFL_MAP_SIZE: the shared memory size for that target. must be >= the size\n"
@@ -340,7 +342,7 @@ int main(int argc, char **argv_orig, char **envp) {
   afl_state_init(afl, map_size);
   afl->debug = debug;
   afl_fsrv_init(&afl->fsrv);
-
+  if (debug) { afl->fsrv.debug = true; }
   read_afl_environment(afl, envp);
   if (afl->shm.map_size) { afl->fsrv.map_size = afl->shm.map_size; }
   exit_1 = !!afl->afl_env.afl_bench_just_one;
@@ -357,7 +359,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
   while ((opt = getopt(
               argc, argv,
-              "+b:c:i:I:o:f:F:m:t:T:dDnCB:S:M:x:QNUWe:p:s:V:E:L:hRP:Z")) > 0) {
+              "+b:B:c:CdDe:E:hi:I:f:F:l:L:m:M:nNo:p:RQs:S:t:T:UV:Wx:Z")) > 0) {
 
     switch (opt) {
 
@@ -501,6 +503,7 @@ int main(int argc, char **argv_orig, char **envp) {
         afl->sync_id = ck_strdup(optarg);
         afl->skip_deterministic = 0;  // force deterministic fuzzing
         afl->old_seed_selection = 1;  // force old queue walking seed selection
+        afl->disable_trim = 1;        // disable trimming
 
         if ((c = strchr(afl->sync_id, ':'))) {
 
@@ -550,14 +553,33 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'F':                                         /* foreign sync dir */
 
-        if (!afl->is_main_node)
+        if (!optarg) { FATAL("Missing path for -F"); }
+        if (!afl->is_main_node) {
+
           FATAL(
               "Option -F can only be specified after the -M option for the "
               "main fuzzer of a fuzzing campaign");
-        if (afl->foreign_sync_cnt >= FOREIGN_SYNCS_MAX)
+
+        }
+
+        if (afl->foreign_sync_cnt >= FOREIGN_SYNCS_MAX) {
+
           FATAL("Maximum %u entried of -F option can be specified",
                 FOREIGN_SYNCS_MAX);
+
+        }
+
         afl->foreign_syncs[afl->foreign_sync_cnt].dir = optarg;
+        while (afl->foreign_syncs[afl->foreign_sync_cnt]
+                   .dir[strlen(afl->foreign_syncs[afl->foreign_sync_cnt].dir) -
+                        1] == '/') {
+
+          afl->foreign_syncs[afl->foreign_sync_cnt]
+              .dir[strlen(afl->foreign_syncs[afl->foreign_sync_cnt].dir) - 1] =
+              0;
+
+        }
+
         afl->foreign_sync_cnt++;
         break;
 
@@ -585,7 +607,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
         if (afl->timeout_given) { FATAL("Multiple -t options not supported"); }
 
-        if (!optarg || sscanf(optarg, "%u%c", &afl->fsrv.exec_tmout, &suffix) < 1 ||
+        if (!optarg ||
+            sscanf(optarg, "%u%c", &afl->fsrv.exec_tmout, &suffix) < 1 ||
             optarg[0] == '-') {
 
           FATAL("Bad syntax used for -t");
@@ -688,7 +711,6 @@ int main(int argc, char **argv_orig, char **envp) {
         if (afl->in_bitmap) { FATAL("Multiple -B options not supported"); }
 
         afl->in_bitmap = optarg;
-        read_bitmap(afl->in_bitmap, afl->virgin_bits, afl->fsrv.map_size);
         break;
 
       case 'C':                                               /* crash mode */
@@ -767,7 +789,8 @@ int main(int argc, char **argv_orig, char **envp) {
       case 'V': {
 
         afl->most_time_key = 1;
-        if (!optarg || sscanf(optarg, "%llu", &afl->most_time) < 1 || optarg[0] == '-') {
+        if (!optarg || sscanf(optarg, "%llu", &afl->most_time) < 1 ||
+            optarg[0] == '-') {
 
           FATAL("Bad syntax used for -V");
 
@@ -778,7 +801,8 @@ int main(int argc, char **argv_orig, char **envp) {
       case 'E': {
 
         afl->most_execs_key = 1;
-        if (!optarg || sscanf(optarg, "%llu", &afl->most_execs) < 1 || optarg[0] == '-') {
+        if (!optarg || sscanf(optarg, "%llu", &afl->most_execs) < 1 ||
+            optarg[0] == '-') {
 
           FATAL("Bad syntax used for -E");
 
@@ -786,6 +810,26 @@ int main(int argc, char **argv_orig, char **envp) {
 
       } break;
 
+      case 'l': {
+
+        if (optarg) { afl->cmplog_lvl = atoi(optarg); }
+        if (afl->cmplog_lvl < 1 || afl->cmplog_lvl > CMPLOG_LVL_MAX) {
+
+          FATAL(
+              "Bad complog level value, accepted values are 1 (default), 2 and "
+              "%u.",
+              CMPLOG_LVL_MAX);
+
+        }
+
+        if (afl->cmplog_lvl == CMPLOG_LVL_MAX) {
+
+          afl->cmplog_max_filesize = MAX_FILE;
+
+        }
+
+      } break;
+
       case 'L': {                                              /* MOpt mode */
 
         if (afl->limit_time_sig) { FATAL("Multiple -L options not supported"); }
@@ -952,6 +996,32 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  if (afl->fsrv.qemu_mode && getenv("AFL_USE_QASAN")) {
+
+    u8 *preload = getenv("AFL_PRELOAD");
+    u8 *libqasan = get_libqasan_path(argv_orig[0]);
+
+    if (!preload) {
+
+      setenv("AFL_PRELOAD", libqasan, 0);
+
+    } else {
+
+      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
+      strcpy(result, libqasan);
+      strcat(result, " ");
+      strcat(result, preload);
+
+      setenv("AFL_PRELOAD", result, 1);
+      ck_free(result);
+
+    }
+
+    afl->afl_env.afl_preload = (u8 *)getenv("AFL_PRELOAD");
+    ck_free(libqasan);
+
+  }
+
   if (afl->fsrv.mem_limit && afl->shm.cmplog_mode) afl->fsrv.mem_limit += 260;
 
   OKF("afl++ is maintained by Marc \"van Hauser\" Heuse, Heiko \"hexcoder\" "
@@ -1075,6 +1145,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  if (afl->shm.cmplog_mode) { OKF("CmpLog level: %u", afl->cmplog_lvl); }
+
   /* Dynamically allocate memory for AFLFast schedules */
   if (afl->schedule >= FAST && afl->schedule <= RARE) {
 
@@ -1305,13 +1377,6 @@ int main(int argc, char **argv_orig, char **envp) {
   set_scheduler_mode(SCHEDULER_MODE_LOW_LATENCY);
   #endif
 
-  afl->fsrv.trace_bits =
-      afl_shm_init(&afl->shm, afl->fsrv.map_size, afl->non_instrumented_mode);
-
-  if (!afl->in_bitmap) { memset(afl->virgin_bits, 255, afl->fsrv.map_size); }
-  memset(afl->virgin_tmout, 255, afl->fsrv.map_size);
-  memset(afl->virgin_crash, 255, afl->fsrv.map_size);
-
   init_count_class16();
 
   if (afl->is_main_node && check_main_node_exists(afl) == 1) {
@@ -1478,6 +1543,56 @@ int main(int argc, char **argv_orig, char **envp) {
   }
 
   afl->argv = use_argv;
+  afl->fsrv.trace_bits =
+      afl_shm_init(&afl->shm, afl->fsrv.map_size, afl->non_instrumented_mode);
+
+  if (!afl->non_instrumented_mode && !afl->fsrv.qemu_mode &&
+      !afl->unicorn_mode) {
+
+    afl->fsrv.map_size = 4194304;  // dummy temporary value
+    setenv("AFL_MAP_SIZE", "4194304", 1);
+
+    u32 new_map_size = afl_fsrv_get_mapsize(
+        &afl->fsrv, afl->argv, &afl->stop_soon, afl->afl_env.afl_debug_child);
+
+    if (new_map_size && new_map_size != 4194304) {
+
+      // only reinitialize when it makes sense
+      if (map_size < new_map_size ||
+          (new_map_size > map_size && new_map_size - map_size > MAP_SIZE)) {
+
+        OKF("Re-initializing maps to %u bytes", new_map_size);
+
+        afl->virgin_bits = ck_realloc(afl->virgin_bits, new_map_size);
+        afl->virgin_tmout = ck_realloc(afl->virgin_tmout, new_map_size);
+        afl->virgin_crash = ck_realloc(afl->virgin_crash, new_map_size);
+        afl->var_bytes = ck_realloc(afl->var_bytes, new_map_size);
+        afl->top_rated =
+            ck_realloc(afl->top_rated, new_map_size * sizeof(void *));
+        afl->clean_trace = ck_realloc(afl->clean_trace, new_map_size);
+        afl->clean_trace_custom =
+            ck_realloc(afl->clean_trace_custom, new_map_size);
+        afl->first_trace = ck_realloc(afl->first_trace, new_map_size);
+        afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, new_map_size);
+
+        afl_fsrv_kill(&afl->fsrv);
+        afl_shm_deinit(&afl->shm);
+        afl->fsrv.map_size = new_map_size;
+        afl->fsrv.trace_bits =
+            afl_shm_init(&afl->shm, new_map_size, afl->non_instrumented_mode);
+        setenv("AFL_NO_AUTODICT", "1", 1);  // loaded already
+        afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
+                       afl->afl_env.afl_debug_child);
+
+      }
+
+      map_size = new_map_size;
+
+    }
+
+    afl->fsrv.map_size = map_size;
+
+  }
 
   if (afl->cmplog_binary) {
 
@@ -1488,52 +1603,74 @@ int main(int argc, char **argv_orig, char **envp) {
     afl->cmplog_fsrv.qemu_mode = afl->fsrv.qemu_mode;
     afl->cmplog_fsrv.cmplog_binary = afl->cmplog_binary;
     afl->cmplog_fsrv.init_child_func = cmplog_exec_child;
-    afl_fsrv_start(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
-                   afl->afl_env.afl_debug_child);
-    OKF("Cmplog forkserver successfully started");
 
-  }
+    afl->cmplog_fsrv.map_size = 4194304;
 
-  perform_dry_run(afl);
+    u32 new_map_size =
+        afl_fsrv_get_mapsize(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
+                             afl->afl_env.afl_debug_child);
 
-  /*
-    if (!user_set_cache && afl->q_testcase_max_cache_size) {
+    if (new_map_size && new_map_size != 4194304) {
 
-      / * The user defined not a fixed number of entries for the cache.
-         Hence we autodetect a good value. After the dry run inputs are
-         trimmed and we know the average and max size of the input seeds.
-         We use this information to set a fitting size to max entries
-         based on the cache size. * /
+      // only reinitialize when it needs to be larger
+      if (map_size < new_map_size) {
 
-      struct queue_entry *q = afl->queue;
-      u64                 size = 0, count = 0, avg = 0, max = 0;
+        OKF("Re-initializing maps to %u bytes due cmplog", new_map_size);
 
-      while (q) {
+        afl->virgin_bits = ck_realloc(afl->virgin_bits, new_map_size);
+        afl->virgin_tmout = ck_realloc(afl->virgin_tmout, new_map_size);
+        afl->virgin_crash = ck_realloc(afl->virgin_crash, new_map_size);
+        afl->var_bytes = ck_realloc(afl->var_bytes, new_map_size);
+        afl->top_rated =
+            ck_realloc(afl->top_rated, new_map_size * sizeof(void *));
+        afl->clean_trace = ck_realloc(afl->clean_trace, new_map_size);
+        afl->clean_trace_custom =
+            ck_realloc(afl->clean_trace_custom, new_map_size);
+        afl->first_trace = ck_realloc(afl->first_trace, new_map_size);
+        afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, new_map_size);
 
-        ++count;
-        size += q->len;
-        if (max < q->len) { max = q->len; }
-        q = q->next;
+        afl_fsrv_kill(&afl->fsrv);
+        afl_fsrv_kill(&afl->cmplog_fsrv);
+        afl_shm_deinit(&afl->shm);
+        afl->cmplog_fsrv.map_size = new_map_size;  // non-cmplog stays the same
 
-      }
+        afl->fsrv.trace_bits =
+            afl_shm_init(&afl->shm, new_map_size, afl->non_instrumented_mode);
+        setenv("AFL_NO_AUTODICT", "1", 1);  // loaded already
+        afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
+                       afl->afl_env.afl_debug_child);
 
-      if (count) {
+        afl_fsrv_start(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
+                       afl->afl_env.afl_debug_child);
 
-        avg = size / count;
-        avg = ((avg + max) / 2) + 1;
+        map_size = new_map_size;
 
       }
 
-      if (avg < 10240) { avg = 10240; }
+    }
 
-      afl->q_testcase_max_cache_entries = afl->q_testcase_max_cache_size / avg;
+    afl->cmplog_fsrv.map_size = map_size;
 
-      if (afl->q_testcase_max_cache_entries > 32768)
-        afl->q_testcase_max_cache_entries = 32768;
+    OKF("Cmplog forkserver successfully started");
 
-    }
+  }
+
+  // after we have the correct bitmap size we can read the bitmap -B option
+  // and set the virgin maps
+  if (afl->in_bitmap) {
 
-  */
+    read_bitmap(afl->in_bitmap, afl->virgin_bits, afl->fsrv.map_size);
+
+  } else {
+
+    memset(afl->virgin_bits, 255, map_size);
+
+  }
+
+  memset(afl->virgin_tmout, 255, map_size);
+  memset(afl->virgin_crash, 255, map_size);
+
+  perform_dry_run(afl);
 
   if (afl->q_testcase_max_cache_entries) {
 
@@ -1555,6 +1692,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (unlikely(afl->old_seed_selection)) seek_to = find_start_position(afl);
 
+  afl->start_time = get_cur_time();
+  if (afl->in_place_resume || afl->afl_env.afl_autoresume) load_stats_file(afl);
   write_stats_file(afl, 0, 0, 0);
   maybe_update_plot_file(afl, 0, 0);
   save_auto(afl);
@@ -1600,13 +1739,19 @@ int main(int argc, char **argv_orig, char **envp) {
                  (afl->old_seed_selection && !afl->queue_cur))) {
 
       ++afl->queue_cycle;
-      runs_in_current_cycle = 0;
+      runs_in_current_cycle = (u32)-1;
       afl->cur_skipped_paths = 0;
 
       if (unlikely(afl->old_seed_selection)) {
 
         afl->current_entry = 0;
-        afl->queue_cur = afl->queue;
+        while (unlikely(afl->queue_buf[afl->current_entry]->disabled)) {
+
+          ++afl->current_entry;
+
+        }
+
+        afl->queue_cur = afl->queue_buf[afl->current_entry];
 
         if (unlikely(seek_to)) {
 
@@ -1634,6 +1779,14 @@ int main(int argc, char **argv_orig, char **envp) {
         if (afl->use_splicing) {
 
           ++afl->cycles_wo_finds;
+
+          if (unlikely(afl->shm.cmplog_mode &&
+                       afl->cmplog_max_filesize < MAX_FILE)) {
+
+            afl->cmplog_max_filesize <<= 4;
+
+          }
+
           switch (afl->expand_havoc) {
 
             case 0:
@@ -1642,6 +1795,7 @@ int main(int argc, char **argv_orig, char **envp) {
               break;
             case 1:
               // add MOpt mutator
+              /*
               if (afl->limit_time_sig == 0 && !afl->custom_only &&
                   !afl->python_only) {
 
@@ -1650,10 +1804,11 @@ int main(int argc, char **argv_orig, char **envp) {
 
               }
 
+              */
               afl->expand_havoc = 2;
+              if (afl->cmplog_lvl && afl->cmplog_lvl < 2) afl->cmplog_lvl = 2;
               break;
             case 2:
-              // if (!have_p) afl->schedule = EXPLOIT;
               // increase havoc mutations per fuzz attempt
               afl->havoc_stack_pow2++;
               afl->expand_havoc = 3;
@@ -1664,11 +1819,14 @@ int main(int argc, char **argv_orig, char **envp) {
               afl->expand_havoc = 4;
               break;
             case 4:
-              // if not in sync mode, enable deterministic mode?
-              // if (!afl->sync_id) afl->skip_deterministic = 0;
               afl->expand_havoc = 5;
+              if (afl->cmplog_lvl && afl->cmplog_lvl < 3) afl->cmplog_lvl = 3;
               break;
             case 5:
+              // if not in sync mode, enable deterministic mode?
+              // if (!afl->sync_id) afl->skip_deterministic = 0;
+              afl->expand_havoc = 6;
+            case 6:
               // nothing else currently
               break;
 
@@ -1726,12 +1884,14 @@ int main(int argc, char **argv_orig, char **envp) {
 
         }
 
-        struct queue_entry *q = afl->queue;
         // we must recalculate the scores of all queue entries
-        while (q) {
+        for (i = 0; i < (s32)afl->queued_paths; i++) {
 
-          update_bitmap_score(afl, q);
-          q = q->next;
+          if (likely(!afl->queue_buf[i]->disabled)) {
+
+            update_bitmap_score(afl, afl->queue_buf[i]);
+
+          }
 
         }
 
@@ -1773,8 +1933,15 @@ int main(int argc, char **argv_orig, char **envp) {
 
       if (unlikely(afl->old_seed_selection)) {
 
-        afl->queue_cur = afl->queue_cur->next;
-        ++afl->current_entry;
+        while (++afl->current_entry < afl->queued_paths &&
+               afl->queue_buf[afl->current_entry]->disabled)
+          ;
+        if (unlikely(afl->current_entry >= afl->queued_paths ||
+                     afl->queue_buf[afl->current_entry] == NULL ||
+                     afl->queue_buf[afl->current_entry]->disabled))
+          afl->queue_cur = NULL;
+        else
+          afl->queue_cur = afl->queue_buf[afl->current_entry];
 
       }
 
diff --git a/src/afl-ld-lto.c b/src/afl-ld-lto.c
index 0671d1c4..0a978653 100644
--- a/src/afl-ld-lto.c
+++ b/src/afl-ld-lto.c
@@ -45,6 +45,11 @@
 
 #include <dirent.h>
 
+#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || \
+    defined(__DragonFly__)
+  #include <limits.h>
+#endif
+
 #ifdef __APPLE__
   #include <sys/syslimits.h>
 #endif
@@ -78,7 +83,7 @@ static void edit_params(int argc, char **argv) {
 
   if (!passthrough) {
 
-    for (i = 1; i < argc; i++) {
+    for (i = 1; i < (u32)argc; i++) {
 
       if (strstr(argv[i], "/afl-llvm-rt-lto.o") != NULL) rt_lto_present = 1;
       if (strstr(argv[i], "/afl-llvm-rt.o") != NULL) rt_present = 1;
@@ -86,7 +91,7 @@ static void edit_params(int argc, char **argv) {
 
     }
 
-    for (i = 1; i < argc && !gold_pos; i++) {
+    for (i = 1; i < (u32)argc && !gold_pos; i++) {
 
       if (strcmp(argv[i], "-plugin") == 0) {
 
@@ -95,7 +100,9 @@ static void edit_params(int argc, char **argv) {
           if (strcasestr(argv[i], "LLVMgold.so") != NULL)
             gold_present = gold_pos = i + 1;
 
-        } else if (i < argc && strcasestr(argv[i + 1], "LLVMgold.so") != NULL) {
+        } else if (i < (u32)argc &&
+
+                   strcasestr(argv[i + 1], "LLVMgold.so") != NULL) {
 
           gold_present = gold_pos = i + 2;
 
@@ -107,7 +114,7 @@ static void edit_params(int argc, char **argv) {
 
     if (!gold_pos) {
 
-      for (i = 1; i + 1 < argc && !gold_pos; i++) {
+      for (i = 1; i + 1 < (u32)argc && !gold_pos; i++) {
 
         if (argv[i][0] != '-') {
 
@@ -193,7 +200,7 @@ static void edit_params(int argc, char **argv) {
         gold_present ? "true" : "false", inst_present ? "true" : "false",
         rt_present ? "true" : "false", rt_lto_present ? "true" : "false");
 
-  for (i = 1; i < argc; i++) {
+  for (i = 1; i < (u32)argc; i++) {
 
     if (ld_param_cnt >= MAX_PARAM_COUNT)
       FATAL(
@@ -252,7 +259,7 @@ static void edit_params(int argc, char **argv) {
 
 int main(int argc, char **argv) {
 
-  s32 pid, i, status;
+  s32  pid, i, status;
   char thecwd[PATH_MAX];
 
   if (getenv("AFL_LD_CALLER") != NULL) {
@@ -319,7 +326,7 @@ int main(int argc, char **argv) {
   if (debug) {
 
     DEBUGF("cd \"%s\";", thecwd);
-    for (i = 0; i < ld_param_cnt; i++)
+    for (i = 0; i < (s32)ld_param_cnt; i++)
       SAYF(" \"%s\"", ld_params[i]);
     SAYF("\n");
 
diff --git a/src/afl-sharedmem.c b/src/afl-sharedmem.c
index fe641d0d..3241a130 100644
--- a/src/afl-sharedmem.c
+++ b/src/afl-sharedmem.c
@@ -66,9 +66,17 @@ static list_t shm_list = {.element_prealloc_count = 0};
 
 void afl_shm_deinit(sharedmem_t *shm) {
 
-  if (shm == NULL) return;
-
+  if (shm == NULL) { return; }
   list_remove(&shm_list, shm);
+  if (shm->shmemfuzz_mode) {
+
+    unsetenv(SHM_FUZZ_ENV_VAR);
+
+  } else {
+
+    unsetenv(SHM_ENV_VAR);
+
+  }
 
 #ifdef USEMMAP
   if (shm->map != NULL) {
@@ -94,6 +102,8 @@ void afl_shm_deinit(sharedmem_t *shm) {
 
   if (shm->cmplog_mode) {
 
+    unsetenv(CMPLOG_SHM_ENV_VAR);
+
     if (shm->cmp_map != NULL) {
 
       munmap(shm->cmp_map, shm->map_size);
diff --git a/src/afl-showmap.c b/src/afl-showmap.c
index 6d95fc1d..b40527d3 100644
--- a/src/afl-showmap.c
+++ b/src/afl-showmap.c
@@ -39,6 +39,7 @@
 #include "sharedmem.h"
 #include "forkserver.h"
 #include "common.h"
+#include "hash.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -83,7 +84,10 @@ static u8 quiet_mode,                  /* Hide non-essential messages?      */
     binary_mode,                       /* Write output as a binary map      */
     keep_cores,                        /* Allow coredumps?                  */
     remove_shm = 1,                    /* remove shmem?                     */
-    collect_coverage;                  /* collect coverage                  */
+    collect_coverage,                  /* collect coverage                  */
+    have_coverage,                     /* have coverage?                    */
+    no_classify,                       /* do not classify counts            */
+    debug;                             /* debug mode                        */
 
 static volatile u8 stop_soon,          /* Ctrl-C pressed?                   */
     child_crashed;                     /* Child crashed?                    */
@@ -314,7 +318,18 @@ static void showmap_run_target_forkserver(afl_forkserver_t *fsrv, u8 *mem,
 
   }
 
-  classify_counts(fsrv);
+  if (fsrv->trace_bits[0] == 1) {
+
+    fsrv->trace_bits[0] = 0;
+    have_coverage = 1;
+
+  } else {
+
+    have_coverage = 0;
+
+  }
+
+  if (!no_classify) { classify_counts(fsrv); }
 
   if (!quiet_mode) { SAYF(cRST "-- Program output ends --\n"); }
 
@@ -487,7 +502,18 @@ static void showmap_run_target(afl_forkserver_t *fsrv, char **argv) {
 
   }
 
-  classify_counts(fsrv);
+  if (fsrv->trace_bits[0] == 1) {
+
+    fsrv->trace_bits[0] = 0;
+    have_coverage = 1;
+
+  } else {
+
+    have_coverage = 0;
+
+  }
+
+  if (!no_classify) { classify_counts(fsrv); }
 
   if (!quiet_mode) { SAYF(cRST "-- Program output ends --\n"); }
 
@@ -677,6 +703,7 @@ static void usage(u8 *argv0) {
       "  -q            - sink program's output and don't show messages\n"
       "  -e            - show edge coverage only, ignore hit counts\n"
       "  -r            - show real tuple values instead of AFL filter values\n"
+      "  -s            - do not classify the map\n"
       "  -c            - allow core dumps\n\n"
 
       "This tool displays raw tuple data captured by AFL instrumentation.\n"
@@ -717,6 +744,7 @@ int main(int argc, char **argv_orig, char **envp) {
   char **argv = argv_cpy_dup(argc, argv_orig);
 
   afl_forkserver_t fsrv_var = {0};
+  if (getenv("AFL_DEBUG")) { debug = 1; }
   fsrv = &fsrv_var;
   afl_fsrv_init(fsrv);
   map_size = get_map_size();
@@ -726,10 +754,14 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (getenv("AFL_QUIET") != NULL) { be_quiet = 1; }
 
-  while ((opt = getopt(argc, argv, "+i:o:f:m:t:A:eqCZQUWbcrh")) > 0) {
+  while ((opt = getopt(argc, argv, "+i:o:f:m:t:A:eqCZQUWbcrsh")) > 0) {
 
     switch (opt) {
 
+      case 's':
+        no_classify = 1;
+        break;
+
       case 'C':
         collect_coverage = 1;
         quiet_mode = 1;
@@ -913,6 +945,31 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (optind == argc || !out_file) { usage(argv[0]); }
 
+  if (fsrv->qemu_mode && getenv("AFL_USE_QASAN")) {
+
+    u8 *preload = getenv("AFL_PRELOAD");
+    u8 *libqasan = get_libqasan_path(argv_orig[0]);
+
+    if (!preload) {
+
+      setenv("AFL_PRELOAD", libqasan, 0);
+
+    } else {
+
+      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
+      strcpy(result, libqasan);
+      strcat(result, " ");
+      strcat(result, preload);
+
+      setenv("AFL_PRELOAD", result, 1);
+      ck_free(result);
+
+    }
+
+    ck_free(libqasan);
+
+  }
+
   if (in_dir) {
 
     if (!out_file && !collect_coverage)
@@ -936,14 +993,16 @@ int main(int argc, char **argv_orig, char **envp) {
 
   //  if (afl->shmem_testcase_mode) { setup_testcase_shmem(afl); }
 
+  setenv("AFL_NO_AUTODICT", "1", 1);
+
   /* initialize cmplog_mode */
   shm.cmplog_mode = 0;
-  fsrv->trace_bits = afl_shm_init(&shm, map_size, 0);
   setup_signal_handlers();
 
   set_up_environment(fsrv);
 
   fsrv->target_path = find_binary(argv[optind]);
+  fsrv->trace_bits = afl_shm_init(&shm, map_size, 0);
 
   if (!quiet_mode) {
 
@@ -954,7 +1013,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (in_dir) {
 
-    if (at_file) { PFATAL("Options -A and -i are mutually exclusive"); }
     detect_file_args(argv + optind, "", &fsrv->use_stdin);
 
   } else {
@@ -996,6 +1054,7 @@ int main(int argc, char **argv_orig, char **envp) {
   /* initialize cmplog_mode */
   shm_fuzz->cmplog_mode = 0;
   u8 *map = afl_shm_init(shm_fuzz, MAX_FILE + sizeof(u32), 1);
+  shm_fuzz->shmemfuzz_mode = 1;
   if (!map) { FATAL("BUG: Zero return from afl_shm_init."); }
 #ifdef USEMMAP
   setenv(SHM_FUZZ_ENV_VAR, shm_fuzz->g_shm_file_path, 1);
@@ -1008,6 +1067,43 @@ int main(int argc, char **argv_orig, char **envp) {
   fsrv->shmem_fuzz_len = (u32 *)map;
   fsrv->shmem_fuzz = map + sizeof(u32);
 
+  if (!fsrv->qemu_mode && !unicorn_mode) {
+
+    u32 save_be_quiet = be_quiet;
+    be_quiet = !debug;
+    fsrv->map_size = 4194304;  // dummy temporary value
+    u32 new_map_size =
+        afl_fsrv_get_mapsize(fsrv, use_argv, &stop_soon,
+                             (get_afl_env("AFL_DEBUG_CHILD") ||
+                              get_afl_env("AFL_DEBUG_CHILD_OUTPUT"))
+                                 ? 1
+                                 : 0);
+    be_quiet = save_be_quiet;
+
+    if (new_map_size) {
+
+      // only reinitialize when it makes sense
+      if (map_size < new_map_size ||
+          (new_map_size > map_size && new_map_size - map_size > MAP_SIZE)) {
+
+        if (!be_quiet)
+          ACTF("Aquired new map size for target: %u bytes\n", new_map_size);
+
+        afl_shm_deinit(&shm);
+        afl_fsrv_kill(fsrv);
+        fsrv->map_size = new_map_size;
+        fsrv->trace_bits = afl_shm_init(&shm, new_map_size, 0);
+
+      }
+
+      map_size = new_map_size;
+
+    }
+
+    fsrv->map_size = map_size;
+
+  }
+
   if (in_dir) {
 
     DIR *          dir_in, *dir_out = NULL;
@@ -1072,8 +1168,9 @@ int main(int argc, char **argv_orig, char **envp) {
 
     }
 
-    stdin_file =
-        alloc_printf("%s/.afl-showmap-temp-%u", use_dir, (u32)getpid());
+    stdin_file = at_file ? strdup(at_file)
+                         : (char *)alloc_printf("%s/.afl-showmap-temp-%u",
+                                                use_dir, (u32)getpid());
     unlink(stdin_file);
     atexit(at_exit_handler);
     fsrv->out_file = stdin_file;
@@ -1210,12 +1307,18 @@ int main(int argc, char **argv_orig, char **envp) {
 
     showmap_run_target(fsrv, use_argv);
     tcnt = write_results_to_file(fsrv, out_file);
+    if (!quiet_mode) {
+
+      OKF("Hash of coverage map: %llx",
+          hash64(fsrv->trace_bits, fsrv->map_size, HASH_CONST));
+
+    }
 
   }
 
   if (!quiet_mode || collect_coverage) {
 
-    if (!tcnt) { FATAL("No instrumentation detected" cRST); }
+    if (!tcnt && !have_coverage) { FATAL("No instrumentation detected" cRST); }
     OKF("Captured %u tuples (highest value %u, total values %llu) in "
         "'%s'." cRST,
         tcnt, highest, total, out_file);
diff --git a/src/afl-tmin.c b/src/afl-tmin.c
index 5fd60cd2..15336959 100644
--- a/src/afl-tmin.c
+++ b/src/afl-tmin.c
@@ -79,7 +79,8 @@ static u8 crash_mode,                  /* Crash-centric mode?               */
     edges_only,                        /* Ignore hit counts?                */
     exact_mode,                        /* Require path match for crashes?   */
     remove_out_file,                   /* remove out_file on exit?          */
-    remove_shm = 1;                    /* remove shmem on exit?             */
+    remove_shm = 1,                    /* remove shmem on exit?             */
+    debug;                             /* debug mode                        */
 
 static volatile u8 stop_soon;          /* Ctrl-C pressed?                   */
 
@@ -878,6 +879,7 @@ int main(int argc, char **argv_orig, char **envp) {
   char **argv = argv_cpy_dup(argc, argv_orig);
 
   afl_forkserver_t fsrv_var = {0};
+  if (getenv("AFL_DEBUG")) { debug = 1; }
   fsrv = &fsrv_var;
   afl_fsrv_init(fsrv);
   map_size = get_map_size();
@@ -1074,10 +1076,35 @@ int main(int argc, char **argv_orig, char **envp) {
   if (optind == argc || !in_file || !output_file) { usage(argv[0]); }
 
   check_environment_vars(envp);
+  setenv("AFL_NO_AUTODICT", "1", 1);
+
+  if (fsrv->qemu_mode && getenv("AFL_USE_QASAN")) {
+
+    u8 *preload = getenv("AFL_PRELOAD");
+    u8 *libqasan = get_libqasan_path(argv_orig[0]);
+
+    if (!preload) {
+
+      setenv("AFL_PRELOAD", libqasan, 0);
+
+    } else {
+
+      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
+      strcpy(result, libqasan);
+      strcat(result, " ");
+      strcat(result, preload);
+
+      setenv("AFL_PRELOAD", result, 1);
+      ck_free(result);
+
+    }
+
+    ck_free(libqasan);
+
+  }
 
   /* initialize cmplog_mode */
   shm.cmplog_mode = 0;
-  fsrv->trace_bits = afl_shm_init(&shm, map_size, 0);
 
   atexit(at_exit_handler);
   setup_signal_handlers();
@@ -1085,6 +1112,7 @@ int main(int argc, char **argv_orig, char **envp) {
   set_up_environment(fsrv);
 
   fsrv->target_path = find_binary(argv[optind]);
+  fsrv->trace_bits = afl_shm_init(&shm, map_size, 0);
   detect_file_args(argv + optind, out_file, &fsrv->use_stdin);
 
   if (fsrv->qemu_mode) {
@@ -1156,6 +1184,7 @@ int main(int argc, char **argv_orig, char **envp) {
   /* initialize cmplog_mode */
   shm_fuzz->cmplog_mode = 0;
   u8 *map = afl_shm_init(shm_fuzz, MAX_FILE + sizeof(u32), 1);
+  shm_fuzz->shmemfuzz_mode = 1;
   if (!map) { FATAL("BUG: Zero return from afl_shm_init."); }
 #ifdef USEMMAP
   setenv(SHM_FUZZ_ENV_VAR, shm_fuzz->g_shm_file_path, 1);
@@ -1170,11 +1199,51 @@ int main(int argc, char **argv_orig, char **envp) {
 
   read_initial_file();
 
-  afl_fsrv_start(
-      fsrv, use_argv, &stop_soon,
-      (get_afl_env("AFL_DEBUG_CHILD") || get_afl_env("AFL_DEBUG_CHILD_OUTPUT"))
-          ? 1
-          : 0);
+  if (!fsrv->qemu_mode && !unicorn_mode) {
+
+    fsrv->map_size = 4194304;  // dummy temporary value
+    u32 new_map_size =
+        afl_fsrv_get_mapsize(fsrv, use_argv, &stop_soon,
+                             (get_afl_env("AFL_DEBUG_CHILD") ||
+                              get_afl_env("AFL_DEBUG_CHILD_OUTPUT"))
+                                 ? 1
+                                 : 0);
+
+    if (new_map_size) {
+
+      if (map_size < new_map_size ||
+          (new_map_size > map_size && new_map_size - map_size > MAP_SIZE)) {
+
+        if (!be_quiet)
+          ACTF("Aquired new map size for target: %u bytes\n", new_map_size);
+
+        afl_shm_deinit(&shm);
+        afl_fsrv_kill(fsrv);
+        fsrv->map_size = new_map_size;
+        fsrv->trace_bits = afl_shm_init(&shm, new_map_size, 0);
+        afl_fsrv_start(fsrv, use_argv, &stop_soon,
+                       (get_afl_env("AFL_DEBUG_CHILD") ||
+                        get_afl_env("AFL_DEBUG_CHILD_OUTPUT"))
+                           ? 1
+                           : 0);
+
+      }
+
+      map_size = new_map_size;
+
+    }
+
+    fsrv->map_size = map_size;
+
+  } else {
+
+    afl_fsrv_start(fsrv, use_argv, &stop_soon,
+                   (get_afl_env("AFL_DEBUG_CHILD") ||
+                    get_afl_env("AFL_DEBUG_CHILD_OUTPUT"))
+                       ? 1
+                       : 0);
+
+  }
 
   if (fsrv->support_shmem_fuzz && !fsrv->use_shmem_fuzz)
     shm_fuzz = deinit_shmem(fsrv, shm_fuzz);
diff --git a/test-instr.c b/test-instr.c
index 84ac0036..00799103 100644
--- a/test-instr.c
+++ b/test-instr.c
@@ -32,7 +32,8 @@ int main(int argc, char **argv) {
 
   } else {
 
-    if (argc >= 3 && strcmp(argv[1], "-f") == 0)
+    if (argc >= 3 && strcmp(argv[1], "-f") == 0) {
+
       if ((fd = open(argv[2], O_RDONLY)) < 0) {
 
         fprintf(stderr, "Error: unable to open %s\n", argv[2]);
@@ -40,6 +41,8 @@ int main(int argc, char **argv) {
 
       }
 
+    }
+
     if (read(fd, buf, sizeof(buf)) < 1) {
 
       printf("Hum?\n");
diff --git a/test/test-basic.sh b/test/test-basic.sh
index 8296b6cc..b4bb9df2 100755
--- a/test/test-basic.sh
+++ b/test/test-basic.sh
@@ -7,12 +7,12 @@ AFL_GCC=afl-gcc
 $ECHO "$BLUE[*] Testing: ${AFL_GCC}, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
 test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc" -o "$SYS" = "i386" && {
  test -e ../${AFL_GCC} -a -e ../afl-showmap -a -e ../afl-fuzz && {
-  ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
+  ../${AFL_GCC} -o test-instr.plain -O0 ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../${AFL_GCC} -o test-compcov.harden test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain && {
     $ECHO "$GREEN[+] ${AFL_GCC} compilation succeeded"
-    echo 0 | ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
-    ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
+    echo 0 | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
+    AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
     test -e test-instr.plain.0 -a -e test-instr.plain.1 && {
       diff test-instr.plain.0 test-instr.plain.1 > /dev/null 2>&1 && {
         $ECHO "$RED[!] ${AFL_GCC} instrumentation should be different on different input but is not"
@@ -26,20 +26,20 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     }
     rm -f test-instr.plain.0 test-instr.plain.1
     SKIP=
-    TUPLES=`echo 1|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-    test "$TUPLES" -gt 2 -a "$TUPLES" -lt 12 && {
+    TUPLES=`echo 1|AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
+    test "$TUPLES" -gt 1 -a "$TUPLES" -lt 12 && {
       $ECHO "$GREEN[+] ${AFL_GCC} run reported $TUPLES instrumented locations which is fine"
     } || {
       $ECHO "$RED[!] ${AFL_GCC} instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
-    test "$TUPLES" -lt 4 && SKIP=1
+    test "$TUPLES" -lt 3 && SKIP=1
     true  # this is needed because of the test above
   } || {
     $ECHO "$RED[!] ${AFL_GCC} failed"
     echo CUT------------------------------------------------------------------CUT
     uname -a
-    ../${AFL_GCC} -o test-instr.plain ../test-instr.c
+    ../${AFL_GCC} -o test-instr.plain -O0 ../test-instr.c
     echo CUT------------------------------------------------------------------CUT
     CODE=1
   }
@@ -128,12 +128,12 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
  $ECHO "$BLUE[*] Testing: ${AFL_GCC}, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
  SKIP=
  test -e ../${AFL_GCC} -a -e ../afl-showmap -a -e ../afl-fuzz && {
-  ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
+  ../${AFL_GCC} -o test-instr.plain -O0 ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../${AFL_GCC} -o test-compcov.harden test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain && {
     $ECHO "$GREEN[+] ${AFL_GCC} compilation succeeded"
-    echo 0 | ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
-    ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
+    echo 0 | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
+    AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
     test -e test-instr.plain.0 -a -e test-instr.plain.1 && {
       diff test-instr.plain.0 test-instr.plain.1 > /dev/null 2>&1 && {
         $ECHO "$RED[!] ${AFL_GCC} instrumentation should be different on different input but is not"
@@ -146,14 +146,14 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
       CODE=1
     }
     rm -f test-instr.plain.0 test-instr.plain.1
-    TUPLES=`echo 1|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-    test "$TUPLES" -gt 2 -a "$TUPLES" -lt 12 && {
+    TUPLES=`echo 1|AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
+    test "$TUPLES" -gt 1 -a "$TUPLES" -lt 12 && {
       $ECHO "$GREEN[+] ${AFL_GCC} run reported $TUPLES instrumented locations which is fine"
     } || {
       $ECHO "$RED[!] ${AFL_GCC} instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
-    test "$TUPLES" -lt 4 && SKIP=1
+    test "$TUPLES" -lt 3 && SKIP=1
     true  # this is needed because of the test above
   } || {
     $ECHO "$RED[!] ${AFL_GCC} failed"
diff --git a/test/test-gcc-plugin.sh b/test/test-gcc-plugin.sh
index 9fe63ea3..4c36b6c9 100755
--- a/test/test-gcc-plugin.sh
+++ b/test/test-gcc-plugin.sh
@@ -10,23 +10,23 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
   AFL_HARDEN=1 ../afl-gcc-fast -o test-compcov.harden.gccpi test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain.gccpi && {
     $ECHO "$GREEN[+] gcc_plugin compilation succeeded"
-    echo 0 | ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain.gccpi > /dev/null 2>&1
-    ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain.gccpi < /dev/null > /dev/null 2>&1
+    echo 0 | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain.gccpi > /dev/null 2>&1
+    AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain.gccpi < /dev/null > /dev/null 2>&1
     test -e test-instr.plain.0 -a -e test-instr.plain.1 && {
       diff test-instr.plain.0 test-instr.plain.1 > /dev/null 2>&1 && {
         $ECHO "$RED[!] gcc_plugin instrumentation should be different on different input but is not"
         CODE=1
       } || {
         $ECHO "$GREEN[+] gcc_plugin instrumentation present and working correctly"
-        TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain.gccpi 2>&1 | grep Captur | awk '{print$3}'`
-        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 9 && {
+        TUPLES=`echo 0|AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain.gccpi 2>&1 | grep Captur | awk '{print$3}'`
+        test "$TUPLES" -gt 1 -a "$TUPLES" -lt 9 && {
           $ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine"
         } || {
           $ECHO "$RED[!] gcc_plugin instrumentation produces a weird numbers: $TUPLES"
           $ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-("
           #CODE=1
         }
-        test "$TUPLES" -lt 4 && SKIP=1
+        test "$TUPLES" -lt 2 && SKIP=1
         true
       }
     } || {
@@ -86,21 +86,21 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
   # now for the special gcc_plugin things
   echo foobar.c > instrumentlist.txt
   AFL_GCC_INSTRUMENT_FILE=instrumentlist.txt ../afl-gcc-fast -o test-compcov test-compcov.c > /dev/null 2>&1
-  test -e test-compcov && test_compcov_binary_functionality ./test-compcov && {
-    echo 1 | ../afl-showmap -m ${MEM_LIMIT} -o - -r -- ./test-compcov 2>&1 | grep -q "Captured 1 tuples" && {
+  test -x test-compcov && test_compcov_binary_functionality ./test-compcov && {
+    echo 1 | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o - -r -- ./test-compcov 2>&1 | grep -q "Captured 0 tuples" && {
       $ECHO "$GREEN[+] gcc_plugin instrumentlist feature works correctly"
     } || {
       $ECHO "$RED[!] gcc_plugin instrumentlist feature failed"
       CODE=1
     }
   } || {
-    $ECHO "$RED[!] gcc_plugin instrumentlist feature compilation failed"
+    $ECHO "$RED[!] gcc_plugin instrumentlist feature compilation failed."
     CODE=1
   }
   rm -f test-compcov test.out instrumentlist.txt
   ../afl-gcc-fast -o test-persistent ../utils/persistent_mode/persistent_demo.c > /dev/null 2>&1
   test -e test-persistent && {
-    echo foo | ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -q -r ./test-persistent && {
+    echo foo | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -q -r ./test-persistent && {
       $ECHO "$GREEN[+] gcc_plugin persistent mode feature works correctly"
     } || {
       $ECHO "$RED[!] gcc_plugin persistent mode feature failed to work"
diff --git a/test/test-libextensions.sh b/test/test-libextensions.sh
index 905a4cbc..40a898c8 100755
--- a/test/test-libextensions.sh
+++ b/test/test-libextensions.sh
@@ -38,14 +38,4 @@ test -e ../libdislocator.so && {
 }
 rm -f test-compcov
 
-test -z "$AFL_CC" && {
-  if type gcc >/dev/null; then
-    export AFL_CC=gcc
-  else
-    if type clang >/dev/null; then
-      export AFL_CC=clang
-    fi
-  fi
-}
-
 . ./test-post.sh
diff --git a/test/test-llvm-lto.sh b/test/test-llvm-lto.sh
index d0b8f8fc..3e762acf 100755
--- a/test/test-llvm-lto.sh
+++ b/test/test-llvm-lto.sh
@@ -16,16 +16,16 @@ test -e ../afl-clang-lto -a -e ../afl-llvm-lto-instrumentation.so && {
   ../afl-clang-lto -o test-instr.plain ../test-instr.c > /dev/null 2>&1
   test -e test-instr.plain && {
     $ECHO "$GREEN[+] llvm_mode LTO compilation succeeded"
-    echo 0 | ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
-    ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
+    echo 0 | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
+    AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
     test -e test-instr.plain.0 -a -e test-instr.plain.1 && {
       diff -q test-instr.plain.0 test-instr.plain.1 > /dev/null 2>&1 && {
         $ECHO "$RED[!] llvm_mode LTO instrumentation should be different on different input but is not"
         CODE=1
       } || {
         $ECHO "$GREEN[+] llvm_mode LTO instrumentation present and working correctly"
-        TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 7 && {
+        TUPLES=`echo 0|AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
+        test "$TUPLES" -gt 2 -a "$TUPLES" -lt 7 && {
           $ECHO "$GREEN[+] llvm_mode LTO run reported $TUPLES instrumented locations which is fine"
         } || {
           $ECHO "$RED[!] llvm_mode LTO instrumentation produces weird numbers: $TUPLES"
@@ -59,7 +59,7 @@ test -e ../afl-clang-lto -a -e ../afl-llvm-lto-instrumentation.so && {
   rm -f test-compcov test.out instrumentlist.txt
   ../afl-clang-lto -o test-persistent ../utils/persistent_mode/persistent_demo.c > /dev/null 2>&1
   test -e test-persistent && {
-    echo foo | ../afl-showmap -m none -o /dev/null -q -r ./test-persistent && {
+    echo foo | AFL_QUIET=1 ../afl-showmap -m none -o /dev/null -q -r ./test-persistent && {
       $ECHO "$GREEN[+] llvm_mode LTO persistent mode feature works correctly"
     } || {
       $ECHO "$RED[!] llvm_mode LTO persistent mode feature failed to work"
diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index e5005d72..156b8920 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -16,22 +16,22 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   AFL_HARDEN=1 ../afl-clang-fast -o test-compcov.harden test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain && {
     $ECHO "$GREEN[+] llvm_mode compilation succeeded"
-    echo 0 | ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
-    ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
+    echo 0 | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.0 -r -- ./test-instr.plain > /dev/null 2>&1
+    AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-instr.plain.1 -r -- ./test-instr.plain < /dev/null > /dev/null 2>&1
     test -e test-instr.plain.0 -a -e test-instr.plain.1 && {
       diff test-instr.plain.0 test-instr.plain.1 > /dev/null 2>&1 && {
         $ECHO "$RED[!] llvm_mode instrumentation should be different on different input but is not"
         CODE=1
       } || {
         $ECHO "$GREEN[+] llvm_mode instrumentation present and working correctly"
-        TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 8 && {
+        TUPLES=`echo 0|AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
+        test "$TUPLES" -gt 2 -a "$TUPLES" -lt 8 && {
           $ECHO "$GREEN[+] llvm_mode run reported $TUPLES instrumented locations which is fine"
         } || {
           $ECHO "$RED[!] llvm_mode instrumentation produces weird numbers: $TUPLES"
           CODE=1
         }
-        test "$TUPLES" -lt 4 && SKIP=1
+        test "$TUPLES" -lt 3 && SKIP=1
         true
       }
     } || {
@@ -128,8 +128,8 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   test -e ../libLLVMInsTrim.so && {
     AFL_LLVM_INSTRUMENT=CFG AFL_LLVM_INSTRIM_LOOPHEAD=1 ../afl-clang-fast -o test-instr.instrim ../test-instr.c > /dev/null 2>test.out
     test -e test-instr.instrim && {
-      TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.instrim 2>&1 | grep Captur | awk '{print$3}'`
-      test "$TUPLES" -gt 2 -a "$TUPLES" -lt 5 && {
+      TUPLES=`echo 0|AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.instrim 2>&1 | grep Captur | awk '{print$3}'`
+      test "$TUPLES" -gt 1 -a "$TUPLES" -lt 5 && {
         $ECHO "$GREEN[+] llvm_mode InsTrim reported $TUPLES instrumented locations which is fine"
       } || {
         $ECHO "$RED[!] llvm_mode InsTrim instrumentation produces weird numbers: $TUPLES"
@@ -216,7 +216,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   rm -rf errors test-cmplog in core.*
   ../afl-clang-fast -o test-persistent ../utils/persistent_mode/persistent_demo.c > /dev/null 2>&1
   test -e test-persistent && {
-    echo foo | ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -q -r ./test-persistent && {
+    echo foo | AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -q -r ./test-persistent && {
       $ECHO "$GREEN[+] llvm_mode persistent mode feature works correctly"
     } || {
       $ECHO "$RED[!] llvm_mode persistent mode feature failed to work"
diff --git a/test/test-qemu-mode.sh b/test/test-qemu-mode.sh
index 73b39a43..0cd6ef40 100755
--- a/test/test-qemu-mode.sh
+++ b/test/test-qemu-mode.sh
@@ -3,6 +3,16 @@
 . ./test-pre.sh
 
 $ECHO "$BLUE[*] Testing: qemu_mode"
+test -z "$AFL_CC" && {
+  if type gcc >/dev/null; then
+    export AFL_CC=gcc
+  else
+    if type clang >/dev/null; then
+      export AFL_CC=clang
+    fi
+  fi
+}
+
 test -e ../afl-qemu-trace && {
   cc -pie -fPIE -o test-instr ../test-instr.c
   cc -o test-compcov test-compcov.c
diff --git a/unicorn_mode/UNICORNAFL_VERSION b/unicorn_mode/UNICORNAFL_VERSION
index 2dbc30b8..d9ae5590 100644
--- a/unicorn_mode/UNICORNAFL_VERSION
+++ b/unicorn_mode/UNICORNAFL_VERSION
@@ -1 +1 @@
-83d1b426
+fb2fc9f2
diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh
index 8f6ceab7..f1d028f8 100755
--- a/unicorn_mode/build_unicorn_support.sh
+++ b/unicorn_mode/build_unicorn_support.sh
@@ -147,6 +147,8 @@ if [ "$PREREQ_NOTFOUND" = "1" ]; then
   exit 1
 fi
 
+unset CFLAGS
+
 echo "[+] All checks passed!"
 
 echo "[*] Making sure unicornafl is checked out"
@@ -177,8 +179,9 @@ echo "[*] Checking out $UNICORNAFL_VERSION"
 sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null
 git checkout "$UNICORNAFL_VERSION" || exit 1
 
-echo "[*] making sure config.h matches"
-cp "../../config.h" "." || exit 1
+echo "[*] making sure afl++ header files match"
+cp "../../include/config.h" "." || exit 1
+cp "../../include/types.h" "." || exit 1
 
 echo "[*] Configuring Unicorn build..."
 
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
index 8c8f9641..1ac4c9f3 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
@@ -45,30 +45,31 @@ MAX_SEG_SIZE = 128 * 1024 * 1024
 INDEX_FILE_NAME = "_index.json"
 
 
-#----------------------
-#---- Helper Functions
+# ----------------------
+# ---- Helper Functions
+
 
 def map_arch():
-    arch = get_arch() # from GEF
-    if 'x86_64' in arch or 'x86-64' in arch:
+    arch = get_arch()  # from GEF
+    if "x86_64" in arch or "x86-64" in arch:
         return "x64"
-    elif 'x86' in arch or 'i386' in arch:
+    elif "x86" in arch or "i386" in arch:
         return "x86"
-    elif 'aarch64' in arch or 'arm64' in arch:
+    elif "aarch64" in arch or "arm64" in arch:
         return "arm64le"
-    elif 'aarch64_be' in arch:
+    elif "aarch64_be" in arch:
         return "arm64be"
-    elif 'armeb' in arch:
+    elif "armeb" in arch:
         # check for THUMB mode
-        cpsr = get_register('$cpsr')
-        if (cpsr & (1 << 5)):
+        cpsr = get_register("$cpsr")
+        if cpsr & (1 << 5):
             return "armbethumb"
         else:
             return "armbe"
-    elif 'arm' in arch:
+    elif "arm" in arch:
         # check for THUMB mode
-        cpsr = get_register('$cpsr')
-        if (cpsr & (1 << 5)):
+        cpsr = get_register("$cpsr")
+        if cpsr & (1 << 5):
             return "armlethumb"
         else:
             return "armle"
@@ -76,8 +77,9 @@ def map_arch():
         return ""
 
 
-#-----------------------
-#---- Dumping functions
+# -----------------------
+# ---- Dumping functions
+
 
 def dump_arch_info():
     arch_info = {}
@@ -89,7 +91,7 @@ def dump_regs():
     reg_state = {}
     for reg in current_arch.all_registers:
         reg_val = get_register(reg)
-        reg_state[reg.strip().strip('$')] = reg_val
+        reg_state[reg.strip().strip("$")] = reg_val
 
     return reg_state
 
@@ -108,47 +110,76 @@ def dump_process_memory(output_dir):
         if entry.page_start == entry.page_end:
             continue
 
-        seg_info = {'start': entry.page_start, 'end': entry.page_end, 'name': entry.path, 'permissions': {
-            "r": entry.is_readable() > 0,
-            "w": entry.is_writable() > 0,
-            "x": entry.is_executable() > 0
-        }, 'content_file': ''}
+        seg_info = {
+            "start": entry.page_start,
+            "end": entry.page_end,
+            "name": entry.path,
+            "permissions": {
+                "r": entry.is_readable() > 0,
+                "w": entry.is_writable() > 0,
+                "x": entry.is_executable() > 0,
+            },
+            "content_file": "",
+        }
 
         # "(deleted)" may or may not be valid, but don't push it.
-        if entry.is_readable() and not '(deleted)' in entry.path:
+        if entry.is_readable() and not "(deleted)" in entry.path:
             try:
                 # Compress and dump the content to a file
                 seg_content = read_memory(entry.page_start, entry.size)
-                if(seg_content == None):
-                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.page_start, entry.path))
+                if seg_content == None:
+                    print(
+                        "Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(
+                            entry.page_start, entry.path
+                        )
+                    )
                 else:
-                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.page_start, len(seg_content), entry.path, repr(seg_info['permissions'])))
+                    print(
+                        "Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(
+                            entry.page_start,
+                            len(seg_content),
+                            entry.path,
+                            repr(seg_info["permissions"]),
+                        )
+                    )
                     compressed_seg_content = zlib.compress(seg_content)
                     md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                     seg_info["content_file"] = md5_sum
 
                     # Write the compressed contents to disk
-                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file = open(os.path.join(output_dir, md5_sum), "wb")
                     out_file.write(compressed_seg_content)
                     out_file.close()
 
             except:
-                print("Exception reading segment ({}): {}".format(entry.path, sys.exc_info()[0]))
+                print(
+                    "Exception reading segment ({}): {}".format(
+                        entry.path, sys.exc_info()[0]
+                    )
+                )
         else:
-            print("Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start))
+            print(
+                "Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start)
+            )
 
         # Add the segment to the list
         final_segment_list.append(seg_info)
 
-
     return final_segment_list
 
-#---------------------------------------------
-#---- ARM Extention (dump floating point regs)
+
+# ---------------------------------------------
+# ---- ARM Extention (dump floating point regs)
+
 
 def dump_float(rge=32):
     reg_convert = ""
-    if map_arch() == "armbe" or map_arch() == "armle" or map_arch() == "armbethumb" or map_arch() == "armbethumb":
+    if (
+        map_arch() == "armbe"
+        or map_arch() == "armle"
+        or map_arch() == "armbethumb"
+        or map_arch() == "armbethumb"
+    ):
         reg_state = {}
         for reg_num in range(32):
             value = gdb.selected_frame().read_register("d" + str(reg_num))
@@ -158,8 +189,10 @@ def dump_float(rge=32):
 
         return reg_state
 
-#----------
-#---- Main
+
+# ----------
+# ---- Main
+
 
 def main():
     print("----- Unicorn Context Dumper -----")
@@ -175,7 +208,9 @@ def main():
     try:
 
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = "UnicornContext_" + timestamp
         if not os.path.exists(output_path):
             os.makedirs(output_path)
@@ -190,7 +225,7 @@ def main():
         }
 
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
         index_file.close()
         print("Done.")
@@ -198,5 +233,6 @@ def main():
     except Exception as e:
         print("!!! ERROR:\n\t{}".format(repr(e)))
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_ida.py b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
index 3f955a5c..fa29fb90 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
@@ -31,8 +31,9 @@ MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 
-#----------------------
-#---- Helper Functions
+# ----------------------
+# ---- Helper Functions
+
 
 def get_arch():
     if ph.id == PLFM_386 and ph.flag & PR_USE64:
@@ -52,6 +53,7 @@ def get_arch():
     else:
         return ""
 
+
 def get_register_list(arch):
     if arch == "arm64le" or arch == "arm64be":
         arch = "arm64"
@@ -59,84 +61,174 @@ def get_register_list(arch):
         arch = "arm"
 
     registers = {
-        "x64" : [
-            "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp",
-            "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
-            "rip", "rsp", "efl",
-            "cs", "ds", "es", "fs", "gs", "ss",
+        "x64": [
+            "rax",
+            "rbx",
+            "rcx",
+            "rdx",
+            "rsi",
+            "rdi",
+            "rbp",
+            "rsp",
+            "r8",
+            "r9",
+            "r10",
+            "r11",
+            "r12",
+            "r13",
+            "r14",
+            "r15",
+            "rip",
+            "rsp",
+            "efl",
+            "cs",
+            "ds",
+            "es",
+            "fs",
+            "gs",
+            "ss",
+        ],
+        "x86": [
+            "eax",
+            "ebx",
+            "ecx",
+            "edx",
+            "esi",
+            "edi",
+            "ebp",
+            "esp",
+            "eip",
+            "esp",
+            "efl",
+            "cs",
+            "ds",
+            "es",
+            "fs",
+            "gs",
+            "ss",
         ],
-        "x86" : [
-            "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
-            "eip", "esp", "efl", 
-            "cs", "ds", "es", "fs", "gs", "ss",
-        ],        
-        "arm" : [
-            "R0", "R1", "R2",  "R3",  "R4",  "R5", "R6", "R7",  
-            "R8", "R9", "R10", "R11", "R12", "PC", "SP", "LR",  
+        "arm": [
+            "R0",
+            "R1",
+            "R2",
+            "R3",
+            "R4",
+            "R5",
+            "R6",
+            "R7",
+            "R8",
+            "R9",
+            "R10",
+            "R11",
+            "R12",
+            "PC",
+            "SP",
+            "LR",
             "PSR",
         ],
-        "arm64" : [
-            "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",  
-            "X8", "X9", "X10", "X11", "X12", "X13", "X14", 
-            "X15", "X16", "X17", "X18", "X19", "X20", "X21", 
-            "X22", "X23", "X24", "X25", "X26", "X27", "X28", 
-            "PC", "SP", "FP", "LR", "CPSR"
+        "arm64": [
+            "X0",
+            "X1",
+            "X2",
+            "X3",
+            "X4",
+            "X5",
+            "X6",
+            "X7",
+            "X8",
+            "X9",
+            "X10",
+            "X11",
+            "X12",
+            "X13",
+            "X14",
+            "X15",
+            "X16",
+            "X17",
+            "X18",
+            "X19",
+            "X20",
+            "X21",
+            "X22",
+            "X23",
+            "X24",
+            "X25",
+            "X26",
+            "X27",
+            "X28",
+            "PC",
+            "SP",
+            "FP",
+            "LR",
+            "CPSR"
             #    "NZCV",
-        ]
+        ],
     }
-    return registers[arch]  
+    return registers[arch]
+
+
+# -----------------------
+# ---- Dumping functions
 
-#-----------------------
-#---- Dumping functions
 
 def dump_arch_info():
     arch_info = {}
     arch_info["arch"] = get_arch()
     return arch_info
 
+
 def dump_regs():
     reg_state = {}
     for reg in get_register_list(get_arch()):
         reg_state[reg] = GetRegValue(reg)
     return reg_state
 
+
 def dump_process_memory(output_dir):
     # Segment information dictionary
     segment_list = []
-    
+
     # Loop over the segments, fill in the info dictionary
     for seg_ea in Segments():
         seg_start = SegStart(seg_ea)
         seg_end = SegEnd(seg_ea)
         seg_size = seg_end - seg_start
-		
+
         seg_info = {}
-        seg_info["name"]  = SegName(seg_ea)
+        seg_info["name"] = SegName(seg_ea)
         seg_info["start"] = seg_start
-        seg_info["end"]   = seg_end
-        
+        seg_info["end"] = seg_end
+
         perms = getseg(seg_ea).perm
         seg_info["permissions"] = {
-            "r": False if (perms & SEGPERM_READ)  == 0 else True,
+            "r": False if (perms & SEGPERM_READ) == 0 else True,
             "w": False if (perms & SEGPERM_WRITE) == 0 else True,
-            "x": False if (perms & SEGPERM_EXEC)  == 0 else True,
+            "x": False if (perms & SEGPERM_EXEC) == 0 else True,
         }
 
         if (perms & SEGPERM_READ) and seg_size <= MAX_SEG_SIZE and isLoaded(seg_start):
             try:
                 # Compress and dump the content to a file
                 seg_content = get_many_bytes(seg_start, seg_end - seg_start)
-                if(seg_content == None):
-                    print("Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(SegName(seg_ea), seg_ea))
+                if seg_content == None:
+                    print(
+                        "Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(
+                            SegName(seg_ea), seg_ea
+                        )
+                    )
                     seg_info["content_file"] = ""
                 else:
-                    print("Dumping segment {0}@0x{1:016x} (size:{2})".format(SegName(seg_ea), seg_ea, len(seg_content)))
+                    print(
+                        "Dumping segment {0}@0x{1:016x} (size:{2})".format(
+                            SegName(seg_ea), seg_ea, len(seg_content)
+                        )
+                    )
                     compressed_seg_content = zlib.compress(seg_content)
                     md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                     seg_info["content_file"] = md5_sum
-                    
+
                     # Write the compressed contents to disk
-                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file = open(os.path.join(output_dir, md5_sum), "wb")
                     out_file.write(compressed_seg_content)
                     out_file.close()
             except:
@@ -145,12 +237,13 @@ def dump_process_memory(output_dir):
         else:
             print("Skipping segment {0}@0x{1:016x}".format(SegName(seg_ea), seg_ea))
             seg_info["content_file"] = ""
-            
+
         # Add the segment to the list
-        segment_list.append(seg_info)     
-   
+        segment_list.append(seg_info)
+
     return segment_list
 
+
 """
     TODO: FINISH IMPORT DUMPING
 def import_callback(ea, name, ord):
@@ -169,41 +262,47 @@ def dump_imports():
     
     return import_dict
 """
- 
-#----------
-#---- Main    
-    
+
+# ----------
+# ---- Main
+
+
 def main():
 
     try:
         print("----- Unicorn Context Dumper -----")
         print("You must be actively debugging before running this!")
-        print("If it fails, double check that you are actively debugging before running.")
+        print(
+            "If it fails, double check that you are actively debugging before running."
+        )
 
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = os.path.dirname(os.path.abspath(GetIdbPath()))
         output_path = os.path.join(output_path, "UnicornContext_" + timestamp)
         if not os.path.exists(output_path):
             os.makedirs(output_path)
         print("Process context will be output to {}".format(output_path))
-            
+
         # Get the context
         context = {
             "arch": dump_arch_info(),
-            "regs": dump_regs(), 
+            "regs": dump_regs(),
             "segments": dump_process_memory(output_path),
-            #"imports": dump_imports(),
+            # "imports": dump_imports(),
         }
 
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
-        index_file.close()    
+        index_file.close()
         print("Done.")
-        
+
     except Exception, e:
         print("!!! ERROR:\n\t{}".format(str(e)))
-        
+
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
index 3c019d77..179d062a 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
@@ -50,10 +50,11 @@ UNICORN_PAGE_SIZE = 0x1000
 
 # Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
 ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
-ALIGN_PAGE_UP   = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
+ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE - 1)
+
+# ----------------------
+# ---- Helper Functions
 
-#----------------------
-#---- Helper Functions
 
 def overlap_alignments(segments, memory):
     final_list = []
@@ -61,33 +62,40 @@ def overlap_alignments(segments, memory):
     curr_end_addr = 0
     curr_node = None
     current_segment = None
-    sorted_segments = sorted(segments, key=lambda k: (k['start'], k['end']))
+    sorted_segments = sorted(segments, key=lambda k: (k["start"], k["end"]))
     if curr_seg_idx < len(sorted_segments):
         current_segment = sorted_segments[curr_seg_idx]
-    for mem in sorted(memory, key=lambda k: (k['start'], -k['end'])):
+    for mem in sorted(memory, key=lambda k: (k["start"], -k["end"])):
         if curr_node is None:
-            if current_segment is not None and current_segment['start'] == mem['start']:
+            if current_segment is not None and current_segment["start"] == mem["start"]:
                 curr_node = deepcopy(current_segment)
-                curr_node['permissions'] = mem['permissions']
+                curr_node["permissions"] = mem["permissions"]
             else:
                 curr_node = deepcopy(mem)
 
-            curr_end_addr = curr_node['end']
-
-        while curr_end_addr <= mem['end']:
-            if curr_node['end'] == mem['end']:
-                if current_segment is not None and current_segment['start'] > curr_node['start'] and current_segment['start'] < curr_node['end']:
-                    curr_node['end'] = current_segment['start']
-                    if(curr_node['end'] > curr_node['start']):
+            curr_end_addr = curr_node["end"]
+
+        while curr_end_addr <= mem["end"]:
+            if curr_node["end"] == mem["end"]:
+                if (
+                    current_segment is not None
+                    and current_segment["start"] > curr_node["start"]
+                    and current_segment["start"] < curr_node["end"]
+                ):
+                    curr_node["end"] = current_segment["start"]
+                    if curr_node["end"] > curr_node["start"]:
                         final_list.append(curr_node)
                     curr_node = deepcopy(current_segment)
-                    curr_node['permissions'] = mem['permissions']
-                    curr_end_addr = curr_node['end']
+                    curr_node["permissions"] = mem["permissions"]
+                    curr_end_addr = curr_node["end"]
                 else:
-                    if(curr_node['end'] > curr_node['start']):
+                    if curr_node["end"] > curr_node["start"]:
                         final_list.append(curr_node)
                     # if curr_node is a segment
-                    if current_segment is not None and current_segment['end'] == mem['end']:
+                    if (
+                        current_segment is not None
+                        and current_segment["end"] == mem["end"]
+                    ):
                         curr_seg_idx += 1
                         if curr_seg_idx < len(sorted_segments):
                             current_segment = sorted_segments[curr_seg_idx]
@@ -98,50 +106,56 @@ def overlap_alignments(segments, memory):
                     break
             # could only be a segment
             else:
-                if curr_node['end'] < mem['end']:
+                if curr_node["end"] < mem["end"]:
                     # check for remaining segments and valid segments
-                    if(curr_node['end'] > curr_node['start']):
+                    if curr_node["end"] > curr_node["start"]:
                         final_list.append(curr_node)
-          
+
                     curr_seg_idx += 1
                     if curr_seg_idx < len(sorted_segments):
                         current_segment = sorted_segments[curr_seg_idx]
                     else:
                         current_segment = None
-                        
-                    if current_segment is not None and current_segment['start'] <= curr_end_addr and current_segment['start'] < mem['end']:
+
+                    if (
+                        current_segment is not None
+                        and current_segment["start"] <= curr_end_addr
+                        and current_segment["start"] < mem["end"]
+                    ):
                         curr_node = deepcopy(current_segment)
-                        curr_node['permissions'] = mem['permissions']
+                        curr_node["permissions"] = mem["permissions"]
                     else:
                         # no more segments
                         curr_node = deepcopy(mem)
-                        
-                    curr_node['start'] = curr_end_addr
-                    curr_end_addr = curr_node['end']
 
-    return final_list    
+                    curr_node["start"] = curr_end_addr
+                    curr_end_addr = curr_node["end"]
+
+    return final_list
+
 
 # https://github.com/llvm-mirror/llvm/blob/master/include/llvm/ADT/Triple.h
 def get_arch():
-    arch, arch_vendor, arch_os = lldb.target.GetTriple().split('-')
-    if arch == 'x86_64':
+    arch, arch_vendor, arch_os = lldb.target.GetTriple().split("-")
+    if arch == "x86_64":
         return "x64"
-    elif arch == 'x86' or arch == 'i386':
+    elif arch == "x86" or arch == "i386":
         return "x86"
-    elif arch == 'aarch64' or arch == 'arm64':
+    elif arch == "aarch64" or arch == "arm64":
         return "arm64le"
-    elif arch == 'aarch64_be':
+    elif arch == "aarch64_be":
         return "arm64be"
-    elif arch == 'armeb':
+    elif arch == "armeb":
         return "armbe"
-    elif arch == 'arm':
+    elif arch == "arm":
         return "armle"
     else:
         return ""
 
 
-#-----------------------
-#---- Dumping functions
+# -----------------------
+# ---- Dumping functions
+
 
 def dump_arch_info():
     arch_info = {}
@@ -152,56 +166,64 @@ def dump_arch_info():
 def dump_regs():
     reg_state = {}
     for reg_list in lldb.frame.GetRegisters():
-        if 'general purpose registers' in reg_list.GetName().lower():
+        if "general purpose registers" in reg_list.GetName().lower():
             for reg in reg_list:
                 reg_state[reg.GetName()] = int(reg.GetValue(), 16)
     return reg_state
 
+
 def get_section_info(sec):
-    name = sec.name if sec.name is not None else ''
+    name = sec.name if sec.name is not None else ""
     if sec.GetParent().name is not None:
-        name = sec.GetParent().name + '.' + sec.name
+        name = sec.GetParent().name + "." + sec.name
 
     module_name = sec.addr.module.file.GetFilename()
-    module_name = module_name if module_name is not None else ''
-    long_name = module_name + '.' + name
-    
+    module_name = module_name if module_name is not None else ""
+    long_name = module_name + "." + name
+
     return sec.addr.load_addr, (sec.addr.load_addr + sec.size), sec.size, long_name
- 
+
 
 def dump_process_memory(output_dir):
     # Segment information dictionary
     raw_segment_list = []
     raw_memory_list = []
-    
+
     # 1st pass:
     # Loop over the segments, fill in the segment info dictionary
     for module in lldb.target.module_iter():
         for seg_ea in module.section_iter():
-            seg_info = {'module': module.file.GetFilename() }
-            seg_info['start'], seg_info['end'], seg_size, seg_info['name'] = get_section_info(seg_ea)
+            seg_info = {"module": module.file.GetFilename()}
+            (
+                seg_info["start"],
+                seg_info["end"],
+                seg_size,
+                seg_info["name"],
+            ) = get_section_info(seg_ea)
             # TODO: Ugly hack for -1 LONG address on 32-bit
-            if seg_info['start'] >= sys.maxint or seg_size <= 0:
-                print "Throwing away page: {}".format(seg_info['name'])     
+            if seg_info["start"] >= sys.maxint or seg_size <= 0:
+                print "Throwing away page: {}".format(seg_info["name"])
                 continue
 
             # Page-align segment
-            seg_info['start'] = ALIGN_PAGE_DOWN(seg_info['start'])
-            seg_info['end'] = ALIGN_PAGE_UP(seg_info['end'])
-            print("Appending: {}".format(seg_info['name']))
+            seg_info["start"] = ALIGN_PAGE_DOWN(seg_info["start"])
+            seg_info["end"] = ALIGN_PAGE_UP(seg_info["end"])
+            print ("Appending: {}".format(seg_info["name"]))
             raw_segment_list.append(seg_info)
 
     # Add the stack memory region (just hardcode 0x1000 around the current SP)
     sp = lldb.frame.GetSP()
     start_sp = ALIGN_PAGE_DOWN(sp)
-    raw_segment_list.append({'start': start_sp, 'end': start_sp + 0x1000, 'name': 'STACK'})
+    raw_segment_list.append(
+        {"start": start_sp, "end": start_sp + 0x1000, "name": "STACK"}
+    )
 
     # Write the original memory to file for debugging
-    index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), 'w')
+    index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), "w")
     index_file.write(json.dumps(raw_segment_list, indent=4))
-    index_file.close()    
+    index_file.close()
 
-    # Loop over raw memory regions 
+    # Loop over raw memory regions
     mem_info = lldb.SBMemoryRegionInfo()
     start_addr = -1
     next_region_addr = 0
@@ -218,15 +240,20 @@ def dump_process_memory(output_dir):
         end_addr = mem_info.GetRegionEnd()
 
         # Unknown region name
-        region_name = 'UNKNOWN'
+        region_name = "UNKNOWN"
 
         # Ignore regions that aren't even mapped
         if mem_info.IsMapped() and mem_info.IsReadable():
-            mem_info_obj = {'start': start_addr, 'end': end_addr, 'name': region_name, 'permissions': {
-                "r": mem_info.IsReadable(),
-                "w": mem_info.IsWritable(),
-                "x": mem_info.IsExecutable()
-            }}
+            mem_info_obj = {
+                "start": start_addr,
+                "end": end_addr,
+                "name": region_name,
+                "permissions": {
+                    "r": mem_info.IsReadable(),
+                    "w": mem_info.IsWritable(),
+                    "x": mem_info.IsExecutable(),
+                },
+            }
 
             raw_memory_list.append(mem_info_obj)
 
@@ -234,65 +261,89 @@ def dump_process_memory(output_dir):
 
     for seg_info in final_segment_list:
         try:
-            seg_info['content_file'] = ''
-            start_addr = seg_info['start']
-            end_addr = seg_info['end']
-            region_name = seg_info['name']
+            seg_info["content_file"] = ""
+            start_addr = seg_info["start"]
+            end_addr = seg_info["end"]
+            region_name = seg_info["name"]
             # Compress and dump the content to a file
             err = lldb.SBError()
-            seg_content = lldb.process.ReadMemory(start_addr, end_addr - start_addr, err)
-            if(seg_content == None):
-                print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(start_addr, region_name))
-                seg_info['content_file'] = ''
+            seg_content = lldb.process.ReadMemory(
+                start_addr, end_addr - start_addr, err
+            )
+            if seg_content == None:
+                print (
+                    "Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(
+                        start_addr, region_name
+                    )
+                )
+                seg_info["content_file"] = ""
             else:
-                print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(start_addr, len(seg_content), region_name, repr(seg_info['permissions'])))
+                print (
+                    "Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(
+                        start_addr,
+                        len(seg_content),
+                        region_name,
+                        repr(seg_info["permissions"]),
+                    )
+                )
                 compressed_seg_content = zlib.compress(seg_content)
                 md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
-                seg_info['content_file'] = md5_sum
-                
+                seg_info["content_file"] = md5_sum
+
                 # Write the compressed contents to disk
-                out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                out_file = open(os.path.join(output_dir, md5_sum), "wb")
                 out_file.write(compressed_seg_content)
                 out_file.close()
-    
+
         except:
-            print("Exception reading segment ({}): {}".format(region_name, sys.exc_info()[0]))
-            
+            print (
+                "Exception reading segment ({}): {}".format(
+                    region_name, sys.exc_info()[0]
+                )
+            )
+
     return final_segment_list
 
-#----------
-#---- Main    
-    
+
+# ----------
+# ---- Main
+
+
 def main():
 
     try:
-        print("----- Unicorn Context Dumper -----")
-        print("You must be actively debugging before running this!")
-        print("If it fails, double check that you are actively debugging before running.")
-        
+        print ("----- Unicorn Context Dumper -----")
+        print ("You must be actively debugging before running this!")
+        print (
+            "If it fails, double check that you are actively debugging before running."
+        )
+
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = "UnicornContext_" + timestamp
         if not os.path.exists(output_path):
             os.makedirs(output_path)
-        print("Process context will be output to {}".format(output_path))
-            
+        print ("Process context will be output to {}".format(output_path))
+
         # Get the context
         context = {
             "arch": dump_arch_info(),
-            "regs": dump_regs(), 
+            "regs": dump_regs(),
             "segments": dump_process_memory(output_path),
         }
-    
+
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
-        index_file.close()    
-        print("Done.")
-        
+        index_file.close()
+        print ("Done.")
+
     except Exception, e:
-        print("!!! ERROR:\n\t{}".format(repr(e)))
-        
+        print ("!!! ERROR:\n\t{}".format(repr(e)))
+
+
 if __name__ == "__main__":
     main()
 elif lldb.debugger:
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
index dc56b2aa..eccbc8bf 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
@@ -59,45 +59,47 @@ MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 
-#----------------------
-#---- Helper Functions
+# ----------------------
+# ---- Helper Functions
+
 
 def map_arch():
-    arch = pwndbg.arch.current # from PWNDBG
-    if 'x86_64' in arch or 'x86-64' in arch:
+    arch = pwndbg.arch.current  # from PWNDBG
+    if "x86_64" in arch or "x86-64" in arch:
         return "x64"
-    elif 'x86' in arch or 'i386' in arch:
+    elif "x86" in arch or "i386" in arch:
         return "x86"
-    elif 'aarch64' in arch or 'arm64' in arch:
+    elif "aarch64" in arch or "arm64" in arch:
         return "arm64le"
-    elif 'aarch64_be' in arch:
+    elif "aarch64_be" in arch:
         return "arm64be"
-    elif 'arm' in arch:
-        cpsr = pwndbg.regs['cpsr']
-        # check endianess 
-        if pwndbg.arch.endian == 'big':
+    elif "arm" in arch:
+        cpsr = pwndbg.regs["cpsr"]
+        # check endianess
+        if pwndbg.arch.endian == "big":
             # check for THUMB mode
-            if (cpsr & (1 << 5)):
+            if cpsr & (1 << 5):
                 return "armbethumb"
             else:
                 return "armbe"
         else:
             # check for THUMB mode
-            if (cpsr & (1 << 5)):
+            if cpsr & (1 << 5):
                 return "armlethumb"
             else:
                 return "armle"
-    elif 'mips' in arch:
-        if pwndbg.arch.endian == 'little':
-            return 'mipsel'
+    elif "mips" in arch:
+        if pwndbg.arch.endian == "little":
+            return "mipsel"
         else:
-            return 'mips'
+            return "mips"
     else:
         return ""
 
 
-#-----------------------
-#---- Dumping functions
+# -----------------------
+# ---- Dumping functions
+
 
 def dump_arch_info():
     arch_info = {}
@@ -110,26 +112,26 @@ def dump_regs():
     for reg in pwndbg.regs.all:
         reg_val = pwndbg.regs[reg]
         # current dumper script looks for register values to be hex strings
-#         reg_str = "0x{:08x}".format(reg_val)
-#         if "64" in get_arch():
-#             reg_str = "0x{:016x}".format(reg_val)
-#         reg_state[reg.strip().strip('$')] = reg_str
-        reg_state[reg.strip().strip('$')] = reg_val
+        #         reg_str = "0x{:08x}".format(reg_val)
+        #         if "64" in get_arch():
+        #             reg_str = "0x{:016x}".format(reg_val)
+        #         reg_state[reg.strip().strip('$')] = reg_str
+        reg_state[reg.strip().strip("$")] = reg_val
     return reg_state
 
 
 def dump_process_memory(output_dir):
     # Segment information dictionary
     final_segment_list = []
-    
+
     # PWNDBG:
     vmmap = pwndbg.vmmap.get()
-    
+
     # Pointer to end of last dumped memory segment
-    segment_last_addr = 0x0;
+    segment_last_addr = 0x0
 
     start = None
-    end   = None
+    end = None
 
     if not vmmap:
         print("No address mapping information found")
@@ -141,86 +143,107 @@ def dump_process_memory(output_dir):
             continue
 
         start = entry.start
-        end   = entry.end
+        end = entry.end
 
-        if (segment_last_addr > entry.start): # indicates overlap
-            if (segment_last_addr > entry.end): # indicates complete overlap, so we skip the segment entirely
+        if segment_last_addr > entry.start:  # indicates overlap
+            if (
+                segment_last_addr > entry.end
+            ):  # indicates complete overlap, so we skip the segment entirely
                 continue
-            else:            
+            else:
                 start = segment_last_addr
-            
-        
-        seg_info = {'start': start, 'end': end, 'name': entry.objfile, 'permissions': {
-            "r": entry.read,
-            "w": entry.write,
-            "x": entry.execute
-        }, 'content_file': ''}
+
+        seg_info = {
+            "start": start,
+            "end": end,
+            "name": entry.objfile,
+            "permissions": {"r": entry.read, "w": entry.write, "x": entry.execute},
+            "content_file": "",
+        }
 
         # "(deleted)" may or may not be valid, but don't push it.
-        if entry.read and not '(deleted)' in entry.objfile:
+        if entry.read and not "(deleted)" in entry.objfile:
             try:
                 # Compress and dump the content to a file
                 seg_content = pwndbg.memory.read(start, end - start)
-                if(seg_content == None):
-                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.start, entry.objfile))
+                if seg_content == None:
+                    print(
+                        "Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(
+                            entry.start, entry.objfile
+                        )
+                    )
                 else:
-                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.start, len(seg_content), entry.objfile, repr(seg_info['permissions'])))
+                    print(
+                        "Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(
+                            entry.start,
+                            len(seg_content),
+                            entry.objfile,
+                            repr(seg_info["permissions"]),
+                        )
+                    )
                     compressed_seg_content = zlib.compress(str(seg_content))
                     md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                     seg_info["content_file"] = md5_sum
-                    
+
                     # Write the compressed contents to disk
-                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file = open(os.path.join(output_dir, md5_sum), "wb")
                     out_file.write(compressed_seg_content)
                     out_file.close()
 
             except Exception as e:
                 traceback.print_exc()
-                print("Exception reading segment ({}): {}".format(entry.objfile, sys.exc_info()[0]))
+                print(
+                    "Exception reading segment ({}): {}".format(
+                        entry.objfile, sys.exc_info()[0]
+                    )
+                )
         else:
             print("Skipping segment {0}@0x{1:016x}".format(entry.objfile, entry.start))
-        
+
         segment_last_addr = end
 
         # Add the segment to the list
         final_segment_list.append(seg_info)
 
-            
     return final_segment_list
 
-#----------
-#---- Main    
-    
+
+# ----------
+# ---- Main
+
+
 def main():
     print("----- Unicorn Context Dumper -----")
     print("You must be actively debugging before running this!")
     print("If it fails, double check that you are actively debugging before running.")
-    
+
     try:
 
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = "UnicornContext_" + timestamp
         if not os.path.exists(output_path):
             os.makedirs(output_path)
         print("Process context will be output to {}".format(output_path))
-            
+
         # Get the context
         context = {
             "arch": dump_arch_info(),
-            "regs": dump_regs(), 
+            "regs": dump_regs(),
             "segments": dump_process_memory(output_path),
         }
 
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
-        index_file.close()    
+        index_file.close()
         print("Done.")
-        
+
     except Exception as e:
         print("!!! ERROR:\n\t{}".format(repr(e)))
-        
+
+
 if __name__ == "__main__" and pwndbg_loaded:
     main()
-    
diff --git a/unicorn_mode/samples/compcov_x64/compcov_test_harness.py b/unicorn_mode/samples/compcov_x64/compcov_test_harness.py
index b9ebb61d..f0749d1b 100644
--- a/unicorn_mode/samples/compcov_x64/compcov_test_harness.py
+++ b/unicorn_mode/samples/compcov_x64/compcov_test_harness.py
@@ -22,48 +22,81 @@ from unicornafl import *
 from unicornafl.x86_const import *
 
 # Path to the file containing the binary to emulate
-BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'compcov_target.bin')
+BINARY_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "compcov_target.bin"
+)
 
 # Memory map for the code to be tested
-CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_ADDRESS = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
-STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
-DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+STACK_SIZE = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 
 try:
     # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
     from capstone import *
+
     cs = Cs(CS_ARCH_X86, CS_MODE_64)
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         mem = uc.mem_read(address, size)
-        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
             print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
 except ImportError:
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
 
+
 def unicorn_debug_block(uc, address, size, user_data):
     print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
 
+
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE:
-        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
         print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
 
+
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE_UNMAPPED:
-        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
 
 def main():
 
     parser = argparse.ArgumentParser(description="Test harness for compcov_target.bin")
-    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
-    parser.add_argument('-t', '--trace', default=False, action="store_true", help="Enables debug tracing")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-t",
+        "--trace",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
     args = parser.parse_args()
 
     # Instantiate a MIPS32 big endian Unicorn Engine instance
@@ -73,13 +106,16 @@ def main():
         uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
         uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
         uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
-        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
 
-    #---------------------------------------------------
+    # ---------------------------------------------------
     # Load the binary to emulate and map it into memory
 
     print("Loading data input from {}".format(args.input_file))
-    binary_file = open(BINARY_FILE, 'rb')
+    binary_file = open(BINARY_FILE, "rb")
     binary_code = binary_file.read()
     binary_file.close()
 
@@ -93,11 +129,11 @@ def main():
     uc.mem_write(CODE_ADDRESS, binary_code)
 
     # Set the program counter to the start of the code
-    start_address = CODE_ADDRESS          # Address of entry point of main()
-    end_address   = CODE_ADDRESS + 0x55   # Address of last instruction in main()
+    start_address = CODE_ADDRESS  # Address of entry point of main()
+    end_address = CODE_ADDRESS + 0x55  # Address of last instruction in main()
     uc.reg_write(UC_X86_REG_RIP, start_address)
 
-    #-----------------
+    # -----------------
     # Setup the stack
 
     uc.mem_map(STACK_ADDRESS, STACK_SIZE)
@@ -106,8 +142,7 @@ def main():
     # Mapping a location to write our buffer to
     uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
 
-
-    #-----------------------------------------------
+    # -----------------------------------------------
     # Load the mutated input and map it into memory
 
     def place_input_callback(uc, input, _, data):
@@ -121,7 +156,7 @@ def main():
         # Write the mutated command into the data buffer
         uc.mem_write(DATA_ADDRESS, input)
 
-    #------------------------------------------------------------
+    # ------------------------------------------------------------
     # Emulate the code, allowing it to process the mutated input
 
     print("Starting the AFL fuzz")
@@ -129,8 +164,9 @@ def main():
         input_file=args.input_file,
         place_input_callback=place_input_callback,
         exits=[end_address],
-        persistent_iters=1
+        persistent_iters=1,
     )
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/samples/simple/simple_test_harness.py b/unicorn_mode/samples/simple/simple_test_harness.py
index f4002ca8..cd04ad3a 100644
--- a/unicorn_mode/samples/simple/simple_test_harness.py
+++ b/unicorn_mode/samples/simple/simple_test_harness.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 """ 
    Simple test harness for AFL's Unicorn Mode.
 
@@ -22,48 +22,81 @@ from unicornafl import *
 from unicornafl.mips_const import *
 
 # Path to the file containing the binary to emulate
-BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
+BINARY_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "simple_target.bin"
+)
 
 # Memory map for the code to be tested
-CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_ADDRESS = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
-STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
-DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+STACK_SIZE = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 
 try:
     # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
     from capstone import *
+
     cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         mem = uc.mem_read(address, size)
-        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
             print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
 except ImportError:
+
     def unicorn_debug_instruction(uc, address, size, user_data):
-        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
 
 def unicorn_debug_block(uc, address, size, user_data):
     print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
-    
+
+
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE:
-        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
+
 
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE_UNMAPPED:
-        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
 
 def main():
 
     parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
-    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
-    parser.add_argument('-t', '--trace', default=False, action="store_true", help="Enables debug tracing")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-t",
+        "--trace",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
     args = parser.parse_args()
 
     # Instantiate a MIPS32 big endian Unicorn Engine instance
@@ -73,13 +106,16 @@ def main():
         uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
         uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
         uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
-        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
 
-    #---------------------------------------------------
+    # ---------------------------------------------------
     # Load the binary to emulate and map it into memory
 
     print("Loading data input from {}".format(args.input_file))
-    binary_file = open(BINARY_FILE, 'rb')
+    binary_file = open(BINARY_FILE, "rb")
     binary_code = binary_file.read()
     binary_file.close()
 
@@ -93,11 +129,11 @@ def main():
     uc.mem_write(CODE_ADDRESS, binary_code)
 
     # Set the program counter to the start of the code
-    start_address = CODE_ADDRESS          # Address of entry point of main()
-    end_address   = CODE_ADDRESS + 0xf4   # Address of last instruction in main()
+    start_address = CODE_ADDRESS  # Address of entry point of main()
+    end_address = CODE_ADDRESS + 0xF4  # Address of last instruction in main()
     uc.reg_write(UC_MIPS_REG_PC, start_address)
 
-    #-----------------
+    # -----------------
     # Setup the stack
 
     uc.mem_map(STACK_ADDRESS, STACK_SIZE)
@@ -106,14 +142,14 @@ def main():
     # reserve some space for data
     uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
 
-    #-----------------------------------------------------
+    # -----------------------------------------------------
     # Set up a callback to place input data (do little work here, it's called for every single iteration)
     # We did not pass in any data and don't use persistent mode, so we can ignore these params.
     # Be sure to check out the docstrings for the uc.afl_* functions.
     def place_input_callback(uc, input, persistent_round, data):
         # Apply constraints to the mutated input
         if len(input) > DATA_SIZE_MAX:
-            #print("Test input is too long (> {} bytes)")
+            # print("Test input is too long (> {} bytes)")
             return False
 
         # Write the mutated command into the data buffer
@@ -122,5 +158,6 @@ def main():
     # Start the fuzzer.
     uc.afl_fuzz(args.input_file, place_input_callback, [end_address])
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/samples/simple/simple_test_harness_alt.py b/unicorn_mode/samples/simple/simple_test_harness_alt.py
index 9c3dbc93..3249b13d 100644
--- a/unicorn_mode/samples/simple/simple_test_harness_alt.py
+++ b/unicorn_mode/samples/simple/simple_test_harness_alt.py
@@ -25,50 +25,79 @@ from unicornafl import *
 from unicornafl.mips_const import *
 
 # Path to the file containing the binary to emulate
-BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
+BINARY_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "simple_target.bin"
+)
 
 # Memory map for the code to be tested
-CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_ADDRESS = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
-STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
-DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+STACK_SIZE = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 
 try:
     # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
     from capstone import *
+
     cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         mem = uc.mem_read(address, size)
-        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
             print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
 except ImportError:
+
     def unicorn_debug_instruction(uc, address, size, user_data):
-        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
 
 def unicorn_debug_block(uc, address, size, user_data):
     print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
-    
+
+
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE:
-        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
+
 
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE_UNMAPPED:
-        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
 
 def force_crash(uc_error):
     # This function should be called to indicate to AFL that a crash occurred during emulation.
     # Pass in the exception received from Uc.emu_start()
     mem_errors = [
-        UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
-        UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
-        UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
+        UC_ERR_READ_UNMAPPED,
+        UC_ERR_READ_PROT,
+        UC_ERR_READ_UNALIGNED,
+        UC_ERR_WRITE_UNMAPPED,
+        UC_ERR_WRITE_PROT,
+        UC_ERR_WRITE_UNALIGNED,
+        UC_ERR_FETCH_UNMAPPED,
+        UC_ERR_FETCH_PROT,
+        UC_ERR_FETCH_UNALIGNED,
     ]
     if uc_error.errno in mem_errors:
         # Memory error - throw SIGSEGV
@@ -80,11 +109,22 @@ def force_crash(uc_error):
         # Not sure what happened - throw SIGABRT
         os.kill(os.getpid(), signal.SIGABRT)
 
+
 def main():
 
     parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
-    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
-    parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
     args = parser.parse_args()
 
     # Instantiate a MIPS32 big endian Unicorn Engine instance
@@ -94,13 +134,16 @@ def main():
         uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
         uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
         uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
-        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
 
-    #---------------------------------------------------
+    # ---------------------------------------------------
     # Load the binary to emulate and map it into memory
 
     print("Loading data input from {}".format(args.input_file))
-    binary_file = open(BINARY_FILE, 'rb')
+    binary_file = open(BINARY_FILE, "rb")
     binary_code = binary_file.read()
     binary_file.close()
 
@@ -114,11 +157,11 @@ def main():
     uc.mem_write(CODE_ADDRESS, binary_code)
 
     # Set the program counter to the start of the code
-    start_address = CODE_ADDRESS          # Address of entry point of main()
-    end_address   = CODE_ADDRESS + 0xf4   # Address of last instruction in main()
+    start_address = CODE_ADDRESS  # Address of entry point of main()
+    end_address = CODE_ADDRESS + 0xF4  # Address of last instruction in main()
     uc.reg_write(UC_MIPS_REG_PC, start_address)
 
-    #-----------------
+    # -----------------
     # Setup the stack
 
     uc.mem_map(STACK_ADDRESS, STACK_SIZE)
@@ -127,10 +170,10 @@ def main():
     # reserve some space for data
     uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
 
-    #-----------------------------------------------------
+    # -----------------------------------------------------
     #   Kick off AFL's fork server
-    #   THIS MUST BE DONE BEFORE LOADING USER DATA! 
-    #   If this isn't done every single run, the AFL fork server 
+    #   THIS MUST BE DONE BEFORE LOADING USER DATA!
+    #   If this isn't done every single run, the AFL fork server
     #   will not be started appropriately and you'll get erratic results!
 
     print("Starting the AFL forkserver")
@@ -142,12 +185,12 @@ def main():
     else:
         out = lambda x, y: print(x.format(y))
 
-    #-----------------------------------------------
+    # -----------------------------------------------
     # Load the mutated input and map it into memory
 
     # Load the mutated input from disk
     out("Loading data input from {}", args.input_file)
-    input_file = open(args.input_file, 'rb')
+    input_file = open(args.input_file, "rb")
     input = input_file.read()
     input_file.close()
 
@@ -159,7 +202,7 @@ def main():
     # Write the mutated command into the data buffer
     uc.mem_write(DATA_ADDRESS, input)
 
-    #------------------------------------------------------------
+    # ------------------------------------------------------------
     # Emulate the code, allowing it to process the mutated input
 
     out("Executing until a crash or execution reaches 0x{0:016x}", end_address)
@@ -175,5 +218,6 @@ def main():
     # UC_AFL_RET_FINISHED = 3
     out("Done. AFL Mode is {}", afl_mode)
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/samples/speedtest/.gitignore b/unicorn_mode/samples/speedtest/.gitignore
new file mode 100644
index 00000000..78310c60
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/.gitignore
@@ -0,0 +1,6 @@
+output
+harness
+harness-debug
+target
+target.o
+target.offsets.*
diff --git a/unicorn_mode/samples/speedtest/Makefile b/unicorn_mode/samples/speedtest/Makefile
new file mode 100644
index 00000000..23f5cb07
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/Makefile
@@ -0,0 +1,17 @@
+CFLAGS += -Wall -Werror -Wextra -Wpedantic -Og -g -fPIE
+
+.PHONY: all clean
+
+all: target target.offsets.main
+
+clean:
+	rm -rf *.o target target.offsets.*
+
+target.o: target.c
+	${CC} ${CFLAGS} -c target.c -o $@
+
+target: target.o
+	${CC} ${CFLAGS} target.o -o $@
+
+target.offsets.main: target
+	./get_offsets.py
\ No newline at end of file
diff --git a/unicorn_mode/samples/speedtest/README.md b/unicorn_mode/samples/speedtest/README.md
new file mode 100644
index 00000000..3c1184a2
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/README.md
@@ -0,0 +1,65 @@
+# Speedtest
+
+This is a simple sample harness for a non-crashing file,
+to show the raw speed of C, Rust, and Python harnesses.
+
+## Compiling...
+
+Make sure, you built unicornafl first (`../../build_unicorn_support.sh`).
+Then, follow these individual steps:
+
+### Rust
+
+```bash
+cd rust
+cargo build --release
+../../../afl-fuzz -i ../sample_inputs -o out -- ./target/release/harness @@
+```
+
+### C
+
+```bash
+cd c
+make
+../../../afl-fuzz -i ../sample_inputs -o out -- ./harness @@
+```
+
+### python
+
+```bash
+cd python
+../../../afl-fuzz -i ../sample_inputs -o out -U -- python3 ./harness.py @@
+```
+
+## Results
+
+TODO: add results here.
+
+
+## Compiling speedtest_target.c
+
+You shouldn't need to compile simple_target.c since a X86_64 binary version is
+pre-built and shipped in this sample folder. This file documents how the binary
+was built in case you want to rebuild it or recompile it for any reason.
+
+The pre-built binary (simple_target_x86_64.bin) was built using -g -O0 in gcc.
+
+We then load the binary and execute the main function directly.
+
+## Addresses for the harness:
+To find the address (in hex) of main, run:
+```bash
+objdump -M intel -D target | grep '<main>:' | cut -d" " -f1
+```
+To find all call sites to magicfn, run:
+```bash
+objdump -M intel -D target | grep '<magicfn>$' | cut -d":" -f1
+```
+For malloc callsites:
+```bash
+objdump -M intel -D target | grep '<malloc@plt>$' | cut -d":" -f1
+```
+And free callsites:
+```bash
+objdump -M intel -D target | grep '<free@plt>$' | cut -d":" -f1
+```
diff --git a/unicorn_mode/samples/speedtest/c/Makefile b/unicorn_mode/samples/speedtest/c/Makefile
new file mode 100644
index 00000000..ce784d4f
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/c/Makefile
@@ -0,0 +1,54 @@
+# UnicornAFL Usage
+# Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015
+# Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020
+.POSIX:
+UNAME_S =$(shell uname -s)# GNU make
+UNAME_S:sh=uname -s       # BSD make
+_UNIQ=_QINU_
+
+LIBDIR = ../../../unicornafl
+BIN_EXT =
+AR_EXT = a
+
+# Verbose output?
+V ?= 0
+
+CFLAGS += -Wall -Werror -Wextra -Wno-unused-parameter -I../../../unicornafl/include
+
+LDFLAGS += -L$(LIBDIR) -lpthread -lm
+
+_LRT = $(_UNIQ)$(UNAME_S:Linux=)
+__LRT = $(_LRT:$(_UNIQ)=-lrt)
+LRT = $(__LRT:$(_UNIQ)=)
+
+LDFLAGS += $(LRT)
+
+_CC = $(_UNIQ)$(CROSS)
+__CC = $(_CC:$(_UNIQ)=$(CC))
+MYCC = $(__CC:$(_UNIQ)$(CROSS)=$(CROSS)gcc)
+
+.PHONY: all clean
+
+all: fuzz
+
+clean:
+	rm -rf *.o harness harness-debug
+
+harness.o: harness.c ../../../unicornafl/include/unicorn/*.h
+	${MYCC} ${CFLAGS} -O3 -c harness.c -o $@
+
+harness-debug.o: harness.c ../../../unicornafl/include/unicorn/*.h
+	${MYCC} ${CFLAGS} -fsanitize=address -g -Og -c harness.c -o $@
+
+harness: harness.o
+	${MYCC} -L${LIBDIR} harness.o ../../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
+
+harness-debug: harness-debug.o
+	${MYCC} -fsanitize=address -g -Og -L${LIBDIR} harness-debug.o ../../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug
+
+../target:
+	$(MAKE) -C ..
+
+fuzz: ../target harness
+	rm -rf ./output
+	SKIP_BINCHECK=1 ../../../../afl-fuzz -s 1 -i ../sample_inputs -o ./output -- ./harness @@
diff --git a/unicorn_mode/samples/speedtest/c/harness.c b/unicorn_mode/samples/speedtest/c/harness.c
new file mode 100644
index 00000000..e8de3d80
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/c/harness.c
@@ -0,0 +1,390 @@
+/*
+   Simple test harness for AFL++'s unicornafl c mode.
+
+   This loads the simple_target_x86_64 binary into
+   Unicorn's memory map for emulation, places the specified input into
+   argv[1], sets up argv, and argc and executes 'main()'.
+   If run inside AFL, afl_fuzz automatically does the "right thing"
+
+   Run under AFL as follows:
+
+   $ cd <afl_path>/unicorn_mode/samples/simple/
+   $ make
+   $ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@
+*/
+
+// This is not your everyday Unicorn.
+#define UNICORN_AFL
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <unicorn/unicorn.h>
+
+// Path to the file containing the binary to emulate
+#define BINARY_FILE ("../target")
+
+// Memory map for the code to be tested
+// Arbitrary address where code to test will be loaded
+static const int64_t BASE_ADDRESS = 0x0;
+// Max size for the code (64kb)
+static const int64_t CODE_SIZE_MAX = 0x00010000;
+// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) )
+static const int64_t INPUT_ADDRESS = 0x00100000;
+// Maximum size for our input
+static const int64_t INPUT_MAX = 0x00100000;
+// Where our pseudo-heap is at
+static const int64_t HEAP_ADDRESS = 0x00200000;
+// Maximum allowable size for the heap
+static const int64_t HEAP_SIZE_MAX = 0x000F0000;
+// Address of the stack (Some random address again)
+static const int64_t STACK_ADDRESS = 0x00400000;
+// Size of the stack (arbitrarily chosen, just make it big enough)
+static const int64_t STACK_SIZE = 0x000F0000;
+
+// Alignment for unicorn mappings (seems to be needed)
+static const int64_t ALIGNMENT = 0x1000;
+
+static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size);
+}
+
+static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size);
+}
+
+/* Unicorn page needs to be 0x1000 aligned, apparently */
+static uint64_t pad(uint64_t size) {
+    if (size % ALIGNMENT == 0) { return size; }
+    return ((size / ALIGNMENT) + 1) * ALIGNMENT;
+} 
+
+/* returns the filesize in bytes, -1 or error. */
+static off_t afl_mmap_file(char *filename, char **buf_ptr) {
+
+    off_t ret = -1;
+
+    int fd = open(filename, O_RDONLY);
+
+    struct stat st = {0};
+    if (fstat(fd, &st)) goto exit;
+
+    off_t in_len = st.st_size;
+    if (in_len == -1) {
+        /* This can only ever happen on 32 bit if the file is exactly 4gb. */
+        fprintf(stderr, "Filesize of %s too large\n", filename);
+        goto exit;
+    }
+
+    *buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+    if (*buf_ptr != MAP_FAILED) ret = in_len;
+
+exit:
+    close(fd);
+    return ret;
+
+}
+
+/* Place the input at the right spot inside unicorn.
+   This code path is *HOT*, do as little work as possible! */
+static bool place_input_callback(
+    uc_engine *uc, 
+    char *input, 
+    size_t input_len, 
+    uint32_t persistent_round, 
+    void *data
+){
+    // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS);
+    if (input_len >= INPUT_MAX) {
+        // Test input too short or too long, ignore this testcase
+        return false;
+    }
+
+    // We need a valid c string, make sure it never goes out of bounds.
+    input[input_len-1] = '\0';
+
+    // Write the testcase to unicorn.
+    uc_mem_write(uc, INPUT_ADDRESS, input, input_len);
+
+    return true;
+}
+
+// exit in case the unicorn-internal mmap fails.
+static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) {
+    size = pad(size);
+    //printf("SIZE %llx, align: %llx\n", size, ALIGNMENT);
+    uc_err err = uc_mem_map(uc, addr, size, mode);
+    if (err != UC_ERR_OK) {
+        printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode);
+        exit(1);
+    }
+}
+
+// allocates an array, reads all addrs to the given array ptr, returns a size
+ssize_t read_all_addrs(char *path, uint64_t *addrs, size_t max_count) {
+
+    FILE *f = fopen(path, "r"); 
+    if (!f) {
+        perror("fopen");
+        fprintf(stderr, "Could not read %s, make sure you ran ./get_offsets.py\n", path);
+        exit(-1);
+    }
+    for (size_t i = 0; i < max_count; i++) {
+        bool end = false;
+        if(fscanf(f, "%lx", &addrs[i]) == EOF) {
+            end = true;
+            i--;
+        } else if (fgetc(f) == EOF) {
+            end = true;
+        }
+        if (end) {
+            printf("Set %ld addrs for %s\n", i + 1, path);
+            fclose(f);
+            return i + 1;
+        }
+    }
+    return max_count;
+}
+
+// Read all addresses from the given file, and set a hook for them.
+void set_all_hooks(uc_engine *uc, char *hook_file, void *hook_fn) {
+
+    FILE *f = fopen(hook_file, "r");
+    if (!f) {
+        fprintf(stderr, "Could not read %s, make sure you ran ./get_offsets.py\n", hook_file);
+        exit(-1);
+    }
+    uint64_t hook_addr;
+    for (int hook_count = 0; 1; hook_count++) {
+        if(fscanf(f, "%lx", &hook_addr) == EOF) {
+            printf("Set %d hooks for %s\n", hook_count, hook_file);
+            fclose(f);
+            return;
+        }
+        printf("got new hook addr %lx (count: %d) ohbytw: sizeof %lx\n", hook_addr, hook_count, sizeof(uc_hook));
+        hook_addr += BASE_ADDRESS;
+        // We'll leek these hooks like a good citizen.
+        uc_hook *hook = calloc(1, sizeof(uc_hook));
+        if (!hook) {
+            perror("calloc");
+            exit(-1);
+        }
+        uc_hook_add(uc, hook, UC_HOOK_CODE, hook_fn, NULL, hook_addr, hook_addr);
+        // guzzle up newline
+        if (fgetc(f) == EOF) {
+            printf("Set %d hooks for %s\n", hook_count, hook_file);
+            fclose(f);
+            return;
+        }
+    }
+
+}
+
+// This is a fancy print function that we're just going to skip for fuzzing.
+static void hook_magicfn(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    address += size;
+    uc_reg_write(uc, UC_X86_REG_RIP, &address);
+} 
+
+static bool already_allocated = false;
+
+// We use a very simple malloc/free stub here, that only works for exactly one allocation at a time.
+static void hook_malloc(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    if (already_allocated) {
+        printf("Double malloc, not supported right now!\n");
+        abort();
+    }
+    // read the first param.
+    uint64_t malloc_size;
+    uc_reg_read(uc, UC_X86_REG_RDI, &malloc_size);
+    if (malloc_size > HEAP_SIZE_MAX) {
+        printf("Tried to allocated %ld bytes, but we only support up to %ld\n", malloc_size, HEAP_SIZE_MAX);
+        abort();
+    }
+    uc_reg_write(uc, UC_X86_REG_RAX, &HEAP_ADDRESS);
+    address += size;
+    uc_reg_write(uc, UC_X86_REG_RIP, &address);
+    already_allocated = true;
+}
+
+// No real free, just set the "used"-flag to false.
+static void hook_free(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    if (!already_allocated) {
+        printf("Double free detected. Real bug?\n");
+        abort();
+    }
+    // read the first param.
+    uint64_t free_ptr;
+    uc_reg_read(uc, UC_X86_REG_RDI, &free_ptr);
+    if (free_ptr != HEAP_ADDRESS) {
+        printf("Tried to free wrong mem region: 0x%lx at code loc 0x%lx\n", free_ptr, address);
+        abort();
+    }
+    address +=  size;
+    uc_reg_write(uc, UC_X86_REG_RIP, &address);
+    already_allocated = false;
+}
+
+int main(int argc, char **argv, char **envp) {
+    if (argc == 1) {
+        printf("Test harness to measure speed against Rust and python. Usage: harness [-t] <inputfile>\n");
+        exit(1);
+    }
+    bool tracing = false;
+    char *filename = argv[1];
+    if (argc > 2 && !strcmp(argv[1], "-t")) {
+        tracing = true;
+        filename = argv[2];
+    }
+
+    uc_engine *uc;
+    uc_err err;
+    uc_hook hooks[2];
+    char *file_contents;
+
+    // Initialize emulator in X86_64 mode
+    err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc);
+    if (err) {
+        printf("Failed on uc_open() with error returned: %u (%s)\n",
+                err, uc_strerror(err));
+        return -1;
+    }
+
+    // If we want tracing output, set the callbacks here
+    if (tracing) {
+        // tracing all basic blocks with customized callback
+        uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
+        uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, 1, 0);
+    }
+
+    printf("The input testcase is set to %s\n", filename);
+
+
+    printf("Loading target from %s\n", BINARY_FILE);
+    off_t len = afl_mmap_file(BINARY_FILE, &file_contents);
+    printf("Binary file size: %lx\n", len);
+    if (len < 0) {
+        perror("Could not read binary to emulate");
+        return -2;
+    }
+    if (len == 0) {
+        fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE);
+        return -3;
+    }
+    if (len > CODE_SIZE_MAX) {
+        fprintf(stderr, "Binary too large, increase CODE_SIZE_MAX\n");
+        return -4;
+    }
+
+    // Map memory.
+    mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL);
+    fflush(stdout);
+
+    // write machine code to be emulated to memory
+    if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) {
+        puts("Error writing to CODE");
+        exit(-1);
+    }
+
+    // Release copied contents
+    munmap(file_contents, len);
+
+    // Set the program counter to the start of the code
+    FILE *f = fopen("../target.offsets.main", "r");
+    if (!f) {
+        perror("fopen");
+        puts("Could not read offset to main function, make sure you ran ./get_offsets.py");
+        exit(-1);
+    }
+    uint64_t start_address;
+    if(fscanf(f, "%lx", &start_address) == EOF) {
+        puts("Start address not found in target.offests.main");
+        exit(-1);
+    }
+    fclose(f);
+    start_address += BASE_ADDRESS;
+    printf("Execution will start at 0x%lx", start_address);
+    // Set the program counter to the start of the code
+    uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main()
+
+    // Setup the Stack
+    mem_map_checked(uc, STACK_ADDRESS, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE);
+    // Setup the stack pointer, but allocate two pointers for the pointers to input
+    uint64_t val = STACK_ADDRESS + STACK_SIZE - 16;
+    //printf("Stack at %lu\n", stack_val);
+    uc_reg_write(uc, UC_X86_REG_RSP, &val);
+
+    // reserve some space for our input data
+    mem_map_checked(uc, INPUT_ADDRESS, INPUT_MAX, UC_PROT_READ);
+
+    // argc = 2
+    val = 2;
+    uc_reg_write(uc, UC_X86_REG_RDI, &val);
+    //RSI points to our little 2 QWORD space at the beginning of the stack...
+    val = STACK_ADDRESS + STACK_SIZE - 16;
+    uc_reg_write(uc, UC_X86_REG_RSI, &val);
+
+    //... which points to the Input. Write the ptr to mem in little endian.
+    uint32_t addr_little = STACK_ADDRESS;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    // The chances you are on a big_endian system aren't too high, but still...
+    __builtin_bswap32(addr_little);
+#endif
+
+    uc_mem_write(uc, STACK_ADDRESS + STACK_SIZE - 16, (char *)&addr_little, 4);
+
+    set_all_hooks(uc, "../target.offsets.malloc", hook_malloc);
+    set_all_hooks(uc, "../target.offsets.magicfn", hook_magicfn);
+    set_all_hooks(uc, "../target.offsets.free", hook_free);
+
+    int exit_count_max = 100;
+    // we don't need more exits for now.
+    uint64_t exits[exit_count_max];
+
+    ssize_t exit_count = read_all_addrs("../target.offsets.main_ends", exits, exit_count_max);
+    if (exit_count < 1) {
+        printf("Could not find exits! aborting.\n");
+        abort();
+    }
+
+    printf("Starting to fuzz. Running from addr %ld to one of these %ld exits:\n", start_address, exit_count);
+    for (ssize_t i = 0; i < exit_count; i++) {
+        printf("    exit %ld: %ld\n", i, exits[i]);
+    }
+
+    fflush(stdout);
+
+    // let's gooo
+    uc_afl_ret afl_ret = uc_afl_fuzz(
+        uc, // The unicorn instance we prepared
+        filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file.
+        place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance
+        exits, // Where to exit (this is an array)
+        exit_count,  // Count of end addresses
+        NULL, // Optional calback to run after each exec
+        false, // true, if the optional callback should be run also for non-crashes
+        1000, // For persistent mode: How many rounds to run
+        NULL // additional data pointer
+    );
+    switch(afl_ret) {
+        case UC_AFL_RET_ERROR:
+            printf("Error starting to fuzz");
+            return -3;
+            break;
+        case UC_AFL_RET_NO_AFL:
+            printf("No AFL attached - We are done with a single run.");
+            break;
+        default:
+            break;
+    } 
+    return 0;
+}
diff --git a/unicorn_mode/samples/speedtest/get_offsets.py b/unicorn_mode/samples/speedtest/get_offsets.py
new file mode 100644
index 00000000..c9dc76df
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/get_offsets.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+"""This simple script uses objdump to parse important addresses from the target"""
+import shlex
+import subprocess
+
+objdump_output = subprocess.check_output(
+    shlex.split("objdump -M intel -D target")
+).decode()
+main_loc = None
+main_ends = []
+main_ended = False
+magicfn_calls = []
+malloc_calls = []
+free_calls = []
+strlen_calls = []
+
+
+def line2addr(line):
+    return "0x" + line.split(":", 1)[0].strip()
+
+
+last_line = None
+for line in objdump_output.split("\n"):
+    line = line.strip()
+
+    def read_addr_if_endswith(findme, list_to):
+        """
+        Look, for example, for the addr like:
+        12a9:       e8 f2 fd ff ff          call   10a0 <free@plt>
+        """
+        if line.endswith(findme):
+            list_to.append(line2addr(line))
+
+    if main_loc is not None and main_ended is False:
+        # We want to know where main ends. An empty line in objdump.
+        if len(line) == 0:
+            main_ends.append(line2addr(last_line))
+            main_ended = True
+        elif "ret" in line:
+            main_ends.append(line2addr(line))
+
+    if "<main>:" in line:
+        if main_loc is not None:
+            raise Exception("Found multiple main functions, odd target!")
+        # main_loc is the label, so it's parsed differntly (i.e. `0000000000001220 <main>:`)
+        main_loc = "0x" + line.strip().split(" ", 1)[0].strip()
+    else:
+        [
+            read_addr_if_endswith(*x)
+            for x in [
+                ("<free@plt>", free_calls),
+                ("<malloc@plt>", malloc_calls),
+                ("<strlen@plt>", strlen_calls),
+                ("<magicfn>", magicfn_calls),
+            ]
+        ]
+
+    last_line = line
+
+if main_loc is None:
+    raise (
+        "Could not find main in ./target! Make sure objdump is installed and the target is compiled."
+    )
+
+with open("target.offsets.main", "w") as f:
+    f.write(main_loc)
+with open("target.offsets.main_ends", "w") as f:
+    f.write("\n".join(main_ends))
+with open("target.offsets.magicfn", "w") as f:
+    f.write("\n".join(magicfn_calls))
+with open("target.offsets.malloc", "w") as f:
+    f.write("\n".join(malloc_calls))
+with open("target.offsets.free", "w") as f:
+    f.write("\n".join(free_calls))
+with open("target.offsets.strlen", "w") as f:
+    f.write("\n".join(strlen_calls))
diff --git a/unicorn_mode/samples/speedtest/python/Makefile b/unicorn_mode/samples/speedtest/python/Makefile
new file mode 100644
index 00000000..4282c6cb
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/python/Makefile
@@ -0,0 +1,8 @@
+all: fuzz
+
+../target:
+	$(MAKE) -C ..
+
+fuzz: ../target
+	rm -rf ./ouptput
+	../../../../afl-fuzz -s 1 -U -i ../sample_inputs -o ./output -- python3 harness.py @@
diff --git a/unicorn_mode/samples/speedtest/python/harness.py b/unicorn_mode/samples/speedtest/python/harness.py
new file mode 100644
index 00000000..801ef4d1
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/python/harness.py
@@ -0,0 +1,277 @@
+#!/usr/bin/env python3
+""" 
+    Simple test harness for AFL's Unicorn Mode.
+
+    This loads the speedtest target binary (precompiled X64 code) into
+    Unicorn's memory map for emulation, places the specified input into
+    Argv, and executes main.
+    There should not be any crashes - it's a speedtest against Rust and c.
+
+    Before running this harness, call make in the parent folder.
+
+    Run under AFL as follows:
+
+    $ cd <afl_path>/unicorn_mode/samples/speedtest/python
+    $ ../../../../afl-fuzz -U -i ../sample_inputs -o ./output -- python3 harness.py @@
+"""
+
+import argparse
+import os
+import struct
+
+from unicornafl import *
+from unicornafl.unicorn_const import UC_ARCH_X86, UC_HOOK_CODE, UC_MODE_64
+from unicornafl.x86_const import (
+    UC_X86_REG_RAX,
+    UC_X86_REG_RDI,
+    UC_X86_REG_RIP,
+    UC_X86_REG_RSI,
+    UC_X86_REG_RSP,
+)
+
+# Memory map for the code to be tested
+BASE_ADDRESS = 0x0  # Arbitrary address where the (PIE) target binary will be loaded to
+CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
+INPUT_ADDRESS = 0x00100000  # where we put our stuff
+INPUT_MAX = 0x00100000  # max size for our input
+HEAP_ADDRESS = 0x00200000  # Heap addr
+HEAP_SIZE_MAX = 0x000F0000  # Maximum allowable size for the heap
+STACK_ADDRESS = 0x00400000  # Address of the stack (arbitrarily chosen)
+STACK_SIZE = 0x000F0000  # Size of the stack (arbitrarily chosen)
+
+target_path = os.path.abspath(
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
+)
+target_bin = os.path.join(target_path, "target")
+
+
+def get_offsets_for(name):
+    full_path = os.path.join(target_path, f"target.offsets.{name}")
+    with open(full_path) as f:
+        return [int(x, 16) + BASE_ADDRESS for x in f.readlines()]
+
+
+# Read all offsets from our objdump file
+main_offset = get_offsets_for("main")[0]
+main_ends = get_offsets_for("main_ends")
+malloc_callsites = get_offsets_for("malloc")
+free_callsites = get_offsets_for("free")
+magicfn_callsites = get_offsets_for("magicfn")
+# Joke's on me: strlen got inlined by my compiler
+strlen_callsites = get_offsets_for("strlen")
+
+try:
+    # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
+    from capstone import *
+
+    cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+
+    def unicorn_debug_instruction(uc, address, size, user_data):
+        mem = uc.mem_read(address, size)
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
+            print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
+except ImportError:
+
+    def unicorn_debug_instruction(uc, address, size, user_data):
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
+
+def unicorn_debug_block(uc, address, size, user_data):
+    print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
+
+def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
+    if access == UC_MEM_WRITE:
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
+    else:
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
+
+
+def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
+    if access == UC_MEM_WRITE_UNMAPPED:
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
+    else:
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
+
+already_allocated = False
+
+
+def malloc_hook(uc, address, size, user_data):
+    """
+    We use a very simple malloc/free stub here, that only works for exactly one allocation at a time.
+    """
+    global already_allocated
+    if already_allocated:
+        print("Double malloc, not supported right now!")
+        os.abort()
+    # read the first param
+    malloc_size = uc.reg_read(UC_X86_REG_RDI)
+    if malloc_size > HEAP_SIZE_MAX:
+        print(
+            f"Tried to allocate {malloc_size} bytes, aint't nobody got space for that! (We may only allocate up to {HEAP_SIZE_MAX})"
+        )
+        os.abort()
+    uc.reg_write(UC_X86_REG_RAX, HEAP_ADDRESS)
+    uc.reg_write(UC_X86_REG_RIP, address + size)
+    already_allocated = True
+
+
+def free_hook(uc, address, size, user_data):
+    """
+    No real free, just set the "used"-flag to false.
+    """
+    global already_allocated
+    if not already_allocated:
+        print("Double free detected. Real bug?")
+        os.abort()
+    # read the first param
+    free_ptr = uc.reg_read(UC_X86_REG_RDI)
+    if free_ptr != HEAP_ADDRESS:
+        print(
+            f"Tried to free wrong mem region: {hex(free_ptr)} at code loc {hex(address)}"
+        )
+        os.abort()
+    uc.reg_write(UC_X86_REG_RIP, address + size)
+    already_allocated = False
+
+
+# def strlen_hook(uc, address, size, user_data):
+#     """
+#     No real strlen, we know the len is == our input.
+#     This completely ignores '\0', but for this target, do we really care?
+#     """
+#     global input_len
+#     print(f"Returning len {input_len}")
+#     uc.reg_write(UC_X86_REG_RAX, input_len)
+#     uc.reg_write(UC_X86_REG_RIP, address + size)
+
+
+def magicfn_hook(uc, address, size, user_data):
+    """
+    This is a fancy print function that we're just going to skip for fuzzing.
+    """
+    uc.reg_write(UC_X86_REG_RIP, address + size)
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-t",
+        "--trace",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
+    args = parser.parse_args()
+
+    # Instantiate a MIPS32 big endian Unicorn Engine instance
+    uc = Uc(UC_ARCH_X86, UC_MODE_64)
+
+    if args.trace:
+        uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
+        uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
+        uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
+
+    print("The input testcase is set to {}".format(args.input_file))
+
+    # ---------------------------------------------------
+    # Load the binary to emulate and map it into memory
+    with open(target_bin, "rb") as f:
+        binary_code = f.read()
+
+    # Apply constraints to the mutated input
+    if len(binary_code) > CODE_SIZE_MAX:
+        print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
+        return
+
+    # Write the binary to its place in mem
+    uc.mem_map(BASE_ADDRESS, CODE_SIZE_MAX)
+    uc.mem_write(BASE_ADDRESS, binary_code)
+
+    # Set the program counter to the start of the code
+    uc.reg_write(UC_X86_REG_RIP, main_offset)
+
+    # Setup the stack.
+    uc.mem_map(STACK_ADDRESS, STACK_SIZE)
+    # Setup the stack pointer, but allocate two pointers for the pointers to input.
+    uc.reg_write(UC_X86_REG_RSP, STACK_ADDRESS + STACK_SIZE - 16)
+
+    # Setup our input space, and push the pointer to it in the function params
+    uc.mem_map(INPUT_ADDRESS, INPUT_MAX)
+    # We have argc = 2
+    uc.reg_write(UC_X86_REG_RDI, 2)
+    # RSI points to our little 2 QWORD space at the beginning of the stack...
+    uc.reg_write(UC_X86_REG_RSI, STACK_ADDRESS + STACK_SIZE - 16)
+    # ... which points to the Input. Write the ptr to mem in little endian.
+    uc.mem_write(STACK_ADDRESS + STACK_SIZE - 16, struct.pack("<Q", INPUT_ADDRESS))
+
+    for addr in malloc_callsites:
+        uc.hook_add(UC_HOOK_CODE, malloc_hook, begin=addr, end=addr)
+
+    for addr in free_callsites:
+        uc.hook_add(UC_HOOK_CODE, free_hook, begin=addr, end=addr)
+
+    if len(strlen_callsites):
+        # strlen got inlined for my compiler.
+        print(
+            "Oops, your compiler emitted strlen as function. You may have to change the harness."
+        )
+    # for addr in strlen_callsites:
+    #     uc.hook_add(UC_HOOK_CODE, strlen_hook, begin=addr, end=addr)
+
+    for addr in magicfn_callsites:
+        uc.hook_add(UC_HOOK_CODE, magicfn_hook, begin=addr, end=addr + 1)
+
+    # -----------------------------------------------------
+    # Set up a callback to place input data (do little work here, it's called for every single iteration! This code is *HOT*)
+    # We did not pass in any data and don't use persistent mode, so we can ignore these params.
+    # Be sure to check out the docstrings for the uc.afl_* functions.
+    def place_input_callback(uc, input, persistent_round, data):
+        # Apply constraints to the mutated input
+        input_len = len(input)
+        # global input_len
+        if input_len > INPUT_MAX:
+            # print("Test input is too long (> {} bytes)")
+            return False
+
+        # print(f"Placing input: {input} in round {persistent_round}")
+
+        # Make sure the string is always 0-terminated (as it would be "in the wild")
+        input[-1] = b"\0"
+
+        # Write the mutated command into the data buffer
+        uc.mem_write(INPUT_ADDRESS, input)
+        # uc.reg_write(UC_X86_REG_RIP, main_offset)
+
+    print(f"Starting to fuzz. Running from addr {main_offset} to one of {main_ends}")
+    # Start the fuzzer.
+    uc.afl_fuzz(args.input_file, place_input_callback, main_ends, persistent_iters=1000)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/unicorn_mode/samples/speedtest/rust/.gitignore b/unicorn_mode/samples/speedtest/rust/.gitignore
new file mode 100644
index 00000000..a9d37c56
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/.gitignore
@@ -0,0 +1,2 @@
+target
+Cargo.lock
diff --git a/unicorn_mode/samples/speedtest/rust/Cargo.toml b/unicorn_mode/samples/speedtest/rust/Cargo.toml
new file mode 100644
index 00000000..c19ee0a1
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "unicornafl_harness"
+version = "0.1.0"
+authors = ["Dominik Maier <domenukk@gmail.com>"]
+edition = "2018"
+
+[profile.release]
+lto = true
+opt-level = 3
+panic = "abort"
+
+[dependencies]
+unicornafl = { path = "../../../unicornafl/bindings/rust/", version="1.0.0" }
+capstone="0.6.0"
+libc="0.2.66"
\ No newline at end of file
diff --git a/unicorn_mode/samples/speedtest/rust/Makefile b/unicorn_mode/samples/speedtest/rust/Makefile
new file mode 100644
index 00000000..fe18d6ee
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/Makefile
@@ -0,0 +1,17 @@
+all: fuzz
+
+clean:
+	cargo clean
+
+./target/release/unicornafl_harness: ./src/main.rs
+	cargo build --release
+
+./target/debug/unicornafl_harness: ./src/main.rs
+	cargo build
+
+../target:
+	$(MAKE) -c ..
+
+fuzz: ../target ./target/release/unicornafl_harness
+	rm -rf ./output
+	SKIP_BINCHECK=1 ../../../../afl-fuzz -s 1 -i ../sample_inputs -o ./output -- ./target/release/unicornafl_harness @@
diff --git a/unicorn_mode/samples/speedtest/rust/src/main.rs b/unicorn_mode/samples/speedtest/rust/src/main.rs
new file mode 100644
index 00000000..1e35ff0b
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/src/main.rs
@@ -0,0 +1,232 @@
+extern crate capstone;
+extern crate libc;
+
+use core::cell::Cell;
+use std::{
+    env,
+    fs::File,
+    io::{self, Read},
+    process::abort,
+    str,
+};
+
+use unicornafl::{
+    unicorn_const::{uc_error, Arch, Mode, Permission},
+    RegisterX86::{self, *},
+    Unicorn, UnicornHandle,
+};
+
+const BINARY: &str = &"../target";
+
+// Memory map for the code to be tested
+// Arbitrary address where code to test will be loaded
+const BASE_ADDRESS: u64 = 0x0;
+// Max size for the code (64kb)
+const CODE_SIZE_MAX: u64 = 0x00010000;
+// Location where the input will be placed (make sure the uclated program knows this somehow, too ;) )
+const INPUT_ADDRESS: u64 = 0x00100000;
+// Maximum size for our input
+const INPUT_MAX: u64 = 0x00100000;
+// Where our pseudo-heap is at
+const HEAP_ADDRESS: u64 = 0x00200000;
+// Maximum allowable size for the heap
+const HEAP_SIZE_MAX: u64 = 0x000F0000;
+// Address of the stack (Some random address again)
+const STACK_ADDRESS: u64 = 0x00400000;
+// Size of the stack (arbitrarily chosen, just make it big enough)
+const STACK_SIZE: u64 = 0x000F0000;
+
+fn read_file(filename: &str) -> Result<Vec<u8>, io::Error> {
+    let mut f = File::open(filename)?;
+    let mut buffer = Vec::new();
+    f.read_to_end(&mut buffer)?;
+    Ok(buffer)
+}
+
+/// Our location parser
+fn parse_locs(loc_name: &str) -> Result<Vec<u64>, io::Error> {
+    let contents = &read_file(&format!("../target.offsets.{}", loc_name))?;
+    //println!("Read: {:?}", contents);
+    Ok(str_from_u8_unchecked(&contents)
+        .split("\n")
+        .map(|x| {
+            //println!("Trying to convert {}", &x[2..]);
+            let result = u64::from_str_radix(&x[2..], 16);
+            result.unwrap()
+        })
+        .collect())
+}
+
+// find null terminated string in vec
+pub fn str_from_u8_unchecked(utf8_src: &[u8]) -> &str {
+    let nul_range_end = utf8_src
+        .iter()
+        .position(|&c| c == b'\0')
+        .unwrap_or(utf8_src.len());
+    unsafe { str::from_utf8_unchecked(&utf8_src[0..nul_range_end]) }
+}
+
+fn align(size: u64) -> u64 {
+    const ALIGNMENT: u64 = 0x1000;
+    if size % ALIGNMENT == 0 {
+        size
+    } else {
+        ((size / ALIGNMENT) + 1) * ALIGNMENT
+    }
+}
+
+fn main() {
+    let args: Vec<String> = env::args().collect();
+    if args.len() == 1 {
+        println!("Missing parameter <uclation_input> (@@ for AFL)");
+        return;
+    }
+    let input_file = &args[1];
+    println!("The input testcase is set to {}", input_file);
+    fuzz(input_file).unwrap();
+}
+
+fn fuzz(input_file: &str) -> Result<(), uc_error> {
+    let mut unicorn = Unicorn::new(Arch::X86, Mode::MODE_64, 0)?;
+    let mut uc: UnicornHandle<'_, _> = unicorn.borrow();
+
+    let binary = read_file(BINARY).expect(&format!("Could not read modem image: {}", BINARY));
+    let _aligned_binary_size = align(binary.len() as u64);
+    // Apply constraints to the mutated input
+    if binary.len() as u64 > CODE_SIZE_MAX {
+        println!("Binary code is too large (> {} bytes)", CODE_SIZE_MAX);
+    }
+
+    // Write the binary to its place in mem
+    uc.mem_map(BASE_ADDRESS, CODE_SIZE_MAX as usize, Permission::ALL)?;
+    uc.mem_write(BASE_ADDRESS, &binary)?;
+
+    // Set the program counter to the start of the code
+    let main_locs = parse_locs("main").unwrap();
+    //println!("Entry Point: {:x}", main_locs[0]);
+    uc.reg_write(RegisterX86::RIP as i32, main_locs[0])?;
+
+    // Setup the stack.
+    uc.mem_map(
+        STACK_ADDRESS,
+        STACK_SIZE as usize,
+        Permission::READ | Permission::WRITE,
+    )?;
+    // Setup the stack pointer, but allocate two pointers for the pointers to input.
+    uc.reg_write(RSP as i32, STACK_ADDRESS + STACK_SIZE - 16)?;
+
+    // Setup our input space, and push the pointer to it in the function params
+    uc.mem_map(INPUT_ADDRESS, INPUT_MAX as usize, Permission::READ)?;
+    // We have argc = 2
+    uc.reg_write(RDI as i32, 2)?;
+    // RSI points to our little 2 QWORD space at the beginning of the stack...
+    uc.reg_write(RSI as i32, STACK_ADDRESS + STACK_SIZE - 16)?;
+    // ... which points to the Input. Write the ptr to mem in little endian.
+    uc.mem_write(
+        STACK_ADDRESS + STACK_SIZE - 16,
+        &(INPUT_ADDRESS as u32).to_le_bytes(),
+    )?;
+
+    let already_allocated = Cell::new(false);
+
+    let already_allocated_malloc = already_allocated.clone();
+    // We use a very simple malloc/free stub here,
+    // that only works for exactly one allocation at a time.
+    let hook_malloc = move |mut uc: UnicornHandle<'_, _>, addr: u64, size: u32| {
+        if already_allocated_malloc.get() {
+            println!("Double malloc, not supported right now!");
+            abort();
+        }
+        // read the first param
+        let malloc_size = uc.reg_read(RDI as i32).unwrap();
+        if malloc_size > HEAP_SIZE_MAX {
+            println!(
+                "Tried to allocate {} bytes, but we may only allocate up to {}",
+                malloc_size, HEAP_SIZE_MAX
+            );
+            abort();
+        }
+        uc.reg_write(RAX as i32, HEAP_ADDRESS).unwrap();
+        uc.reg_write(RIP as i32, addr + size as u64).unwrap();
+        already_allocated_malloc.set(true);
+    };
+
+    let already_allocated_free = already_allocated.clone();
+    // No real free, just set the "used"-flag to false.
+    let hook_free = move |mut uc: UnicornHandle<'_, _>, addr, size| {
+        if already_allocated_free.get() {
+            println!("Double free detected. Real bug?");
+            abort();
+        }
+        // read the first param
+        let free_ptr = uc.reg_read(RDI as i32).unwrap();
+        if free_ptr != HEAP_ADDRESS {
+            println!(
+                "Tried to free wrong mem region {:x} at code loc {:x}",
+                free_ptr, addr
+            );
+            abort();
+        }
+        uc.reg_write(RIP as i32, addr + size as u64).unwrap();
+        already_allocated_free.set(false);
+    };
+
+    /*
+        BEGIN FUNCTION HOOKS
+    */
+
+    // This is a fancy print function that we're just going to skip for fuzzing.
+    let hook_magicfn = move |mut uc: UnicornHandle<'_, _>, addr, size| {
+        uc.reg_write(RIP as i32, addr + size as u64).unwrap();
+    };
+
+    for addr in parse_locs("malloc").unwrap() {
+        //hook!(addr, hook_malloc, "malloc");
+        uc.add_code_hook(addr, addr, Box::new(hook_malloc.clone()))?;
+    }
+
+    for addr in parse_locs("free").unwrap() {
+        uc.add_code_hook(addr, addr, Box::new(hook_free.clone()))?;
+    }
+
+    for addr in parse_locs("magicfn").unwrap() {
+        uc.add_code_hook(addr, addr, Box::new(hook_magicfn.clone()))?;
+    }
+
+    let place_input_callback =
+        |mut uc: UnicornHandle<'_, _>, afl_input: &mut [u8], _persistent_round| {
+            // apply constraints to the mutated input
+            if afl_input.len() > INPUT_MAX as usize {
+                //println!("Skipping testcase with leng {}", afl_input.len());
+                return false;
+            }
+
+            afl_input[afl_input.len() - 1] = b'\0';
+            uc.mem_write(INPUT_ADDRESS, afl_input).unwrap();
+            true
+        };
+
+    // return true if the last run should be counted as crash
+    let crash_validation_callback =
+        |_uc: UnicornHandle<'_, _>, result, _input: &[u8], _persistent_round| {
+            result != uc_error::OK
+        };
+
+    let end_addrs = parse_locs("main_ends").unwrap();
+
+    let ret = uc.afl_fuzz(
+        input_file,
+        Box::new(place_input_callback),
+        &end_addrs,
+        Box::new(crash_validation_callback),
+        false,
+        1000,
+    );
+
+    match ret {
+        Ok(_) => {}
+        Err(e) => panic!(format!("found non-ok unicorn exit: {:?}", e)),
+    }
+
+    Ok(())
+}
diff --git a/unicorn_mode/samples/speedtest/sample_inputs/a b/unicorn_mode/samples/speedtest/sample_inputs/a
new file mode 100644
index 00000000..78981922
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/sample_inputs/a
@@ -0,0 +1 @@
+a
diff --git a/unicorn_mode/samples/speedtest/target.c b/unicorn_mode/samples/speedtest/target.c
new file mode 100644
index 00000000..8359a110
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/target.c
@@ -0,0 +1,77 @@
+/*
+ * Sample target file to test afl-unicorn fuzzing capabilities.
+ * This is a very trivial example that will, however, never crash.
+ * Crashing would change the execution speed.
+ *
+ */
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Random print function we can hook in our harness to test hook speeds.
+char magicfn(char to_print) {
+  puts("Printing a char, just minding my own business: ");
+  putchar(to_print);
+  putchar('\n');
+  return to_print;
+}
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    printf("Gimme input pl0x!\n");
+    return -1;
+  }
+ 
+  // Make sure the hooks work...
+  char *test = malloc(1024);
+  if (!test) {
+    printf("Uh-Oh, malloc doesn't work!");
+    abort();
+  }
+  free(test);
+
+  char *data_buf = argv[1];
+  // We can start the unicorn hooking here.
+  uint64_t data_len = strlen(data_buf);
+  if (data_len < 20) return -2;
+
+  for (; data_len --> 0 ;) {
+    char *buf_cpy = NULL;
+    if (data_len) {
+      buf_cpy = malloc(data_len);
+      if (!buf_cpy) {
+        puts("Oof, malloc failed! :/");
+        abort();
+      }
+      memcpy(buf_cpy, data_buf, data_len);
+    }
+    if (data_len >= 18) {
+      free(buf_cpy);
+      continue;
+    }
+    if (data_len > 2 && data_len < 18) {
+      buf_cpy[data_len - 1] = (char) 0x90;
+    } else if (data_buf[9] == (char) 0x90 && data_buf[10] != 0x00 && buf_cpy[11] == (char) 0x90) {
+        // Cause a crash if data[10] is not zero, but [9] and [11] are zero
+        unsigned char valid_read = buf_cpy[10];
+        if (magicfn(valid_read) != valid_read) {
+          puts("Oof, the hook for data_buf[10] is broken?");
+          abort();
+        }
+    }
+    free(buf_cpy);
+  }
+  if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
+    // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
+    unsigned char valid_read = data_buf[0];
+    if (magicfn(valid_read) != valid_read) {
+      puts("Oof, the hook for data_buf[0] is broken?");
+      abort();
+    }
+  } 
+
+  magicfn('q');
+
+  return 0;
+}
diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl
-Subproject 83d1b426be5d373edcc81576f58a10f617df143
+Subproject fb2fc9f25df32f17f6b6b859e4dbd70f9a857e0
diff --git a/unicorn_mode/update_uc_ref.sh b/unicorn_mode/update_uc_ref.sh
index a2613942..7c1c7778 100755
--- a/unicorn_mode/update_uc_ref.sh
+++ b/unicorn_mode/update_uc_ref.sh
@@ -19,7 +19,7 @@ if [ "$NEW_VERSION" = "-h" ]; then
   exit 1
 fi
 
-git submodule init && git submodule update || exit 1
+git submodule init && git submodule update unicornafl || exit 1
 cd ./unicornafl || exit 1
 git fetch origin dev 1>/dev/null || exit 1
 git stash 1>/dev/null 2>/dev/null
diff --git a/utils/afl_frida/README.md b/utils/afl_frida/README.md
index 7743479b..68b62009 100644
--- a/utils/afl_frida/README.md
+++ b/utils/afl_frida/README.md
@@ -20,7 +20,7 @@ search and edit the `STEP 1`, `STEP 2` and `STEP 3` locations.
 
 Example (after modifying afl-frida.c to your needs and compile it):
 ```
-LD_LIBRARY_PATH=/path/to/the/target/library afl-fuzz -i in -o out -- ./afl-frida
+LD_LIBRARY_PATH=/path/to/the/target/library/ afl-fuzz -i in -o out -- ./afl-frida
 ```
 (or even remote via afl-network-proxy).
 
diff --git a/utils/afl_frida/afl-frida.c b/utils/afl_frida/afl-frida.c
index 087f18e8..711d8f33 100644
--- a/utils/afl_frida/afl-frida.c
+++ b/utils/afl_frida/afl-frida.c
@@ -78,11 +78,11 @@ extern unsigned int * __afl_fuzz_len;
 extern unsigned char *__afl_fuzz_ptr;
 
 // Notify AFL about persistent mode.
-static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##";
+static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##\0";
 int                  __afl_persistent_loop(unsigned int);
 
 // Notify AFL about deferred forkserver.
-static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##";
+static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##\0";
 void                 __afl_manual_init();
 
 // Because we do our own logging.
@@ -94,6 +94,8 @@ typedef struct {
 
   GumAddress base_address;
   guint64    code_start, code_end;
+  GumAddress current_log_impl;
+  uint64_t   afl_prev_loc;
 
 } range_t;
 
@@ -109,12 +111,58 @@ inline static void afl_maybe_log(guint64 current_pc) {
 
 }
 
+#if GUM_NATIVE_CPU == GUM_CPU_AMD64
+
+static const guint8 afl_maybe_log_code[] = {
+
+    0x9c,  // pushfq
+    0x50,  // push rax
+    0x51,  // push rcx
+    0x52,  // push rdx
+    0x56,  // push rsi
+
+    0x89, 0xf8,                                // mov eax, edi
+    0xc1, 0xe0, 0x08,                          // shl eax, 8
+    0xc1, 0xef, 0x04,                          // shr edi, 4
+    0x31, 0xc7,                                // xor edi, eax
+    0x0f, 0xb7, 0xc7,                          // movzx eax, di
+    0x48, 0x8d, 0x0d, 0x30, 0x00, 0x00, 0x00,  // lea rcx, sym._afl_area_ptr_ptr
+    0x48, 0x8b, 0x09,                          // mov rcx, qword [rcx]
+    0x48, 0x8b, 0x09,                          // mov rcx, qword [rcx]
+    0x48, 0x8d, 0x15, 0x1b, 0x00, 0x00, 0x00,  // lea rdx, sym._afl_prev_loc_ptr
+    0x48, 0x8b, 0x32,                          // mov rsi, qword [rdx]
+    0x48, 0x8b, 0x36,                          // mov rsi, qword [rsi]
+    0x48, 0x31, 0xc6,                          // xor rsi, rax
+    0xfe, 0x04, 0x31,                          // inc byte [rcx + rsi]
+
+    0x48, 0xd1, 0xe8,  // shr rax, 1
+    0x48, 0x8b, 0x0a,  // mov rcx, qword [rdx]
+    0x48, 0x89, 0x01,  // mov qword [rcx], rax
+
+    0x5e,  // pop rsi
+    0x5a,  // pop rdx
+    0x59,  // pop rcx
+    0x58,  // pop rax
+    0x9d,  // popfq
+
+    0xc3,  // ret
+           // Read-only data goes here:
+           // uint64_t* afl_prev_loc_ptr
+           // uint8_t** afl_area_ptr_ptr
+           // unsigned int afl_instr_rms
+
+};
+
+#else
+
 static void on_basic_block(GumCpuContext *context, gpointer user_data) {
 
   afl_maybe_log((guint64)user_data);
 
 }
 
+#endif
+
 void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output,
                        gpointer user_data) {
 
@@ -129,8 +177,45 @@ void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output,
       if (instr->address >= range->code_start &&
           instr->address <= range->code_end) {
 
+#if GUM_NATIVE_CPU == GUM_CPU_AMD64
+        GumX86Writer *cw = output->writer.x86;
+        if (range->current_log_impl == 0 ||
+            !gum_x86_writer_can_branch_directly_between(
+                cw->pc, range->current_log_impl) ||
+            !gum_x86_writer_can_branch_directly_between(
+                cw->pc + 128, range->current_log_impl)) {
+
+          gconstpointer after_log_impl = cw->code + 1;
+
+          gum_x86_writer_put_jmp_near_label(cw, after_log_impl);
+
+          range->current_log_impl = cw->pc;
+          gum_x86_writer_put_bytes(cw, afl_maybe_log_code,
+                                   sizeof(afl_maybe_log_code));
+
+          uint64_t *afl_prev_loc_ptr = &range->afl_prev_loc;
+          uint8_t **afl_area_ptr_ptr = &__afl_area_ptr;
+          gum_x86_writer_put_bytes(cw, (const guint8 *)&afl_prev_loc_ptr,
+                                   sizeof(afl_prev_loc_ptr));
+          gum_x86_writer_put_bytes(cw, (const guint8 *)&afl_area_ptr_ptr,
+                                   sizeof(afl_area_ptr_ptr));
+          gum_x86_writer_put_label(cw, after_log_impl);
+
+        }
+
+        gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
+                                              -GUM_RED_ZONE_SIZE);
+        gum_x86_writer_put_push_reg(cw, GUM_REG_RDI);
+        gum_x86_writer_put_mov_reg_address(cw, GUM_REG_RDI,
+                                           GUM_ADDRESS(instr->address));
+        gum_x86_writer_put_call_address(cw, range->current_log_impl);
+        gum_x86_writer_put_pop_reg(cw, GUM_REG_RDI);
+        gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
+                                              GUM_RED_ZONE_SIZE);
+#else
         gum_stalker_iterator_put_callout(iterator, on_basic_block,
                                          (gpointer)instr->address, NULL);
+#endif
         begin = FALSE;
 
       }
@@ -153,7 +238,7 @@ static int enumerate_ranges(const GumRangeDetails *details,
 
 }
 
-int main(int argc, char** argv) {
+int main(int argc, char **argv) {
 
 #ifndef __APPLE__
   (void)personality(ADDR_NO_RANDOMIZE);  // disable ASLR
@@ -166,10 +251,15 @@ int main(int argc, char** argv) {
 
   void *dl = NULL;
   if (argc > 2) {
+
     dl = dlopen(argv[1], RTLD_LAZY);
+
   } else {
+
     dl = dlopen(TARGET_LIBRARY, RTLD_LAZY);
+
   }
+
   if (!dl) {
 
     if (argc > 2)
@@ -197,17 +287,18 @@ int main(int argc, char** argv) {
   // END STEP 2
 
   if (!getenv("AFL_FRIDA_TEST_INPUT")) {
+
     gum_init_embedded();
     if (!gum_stalker_is_supported()) {
-  
+
       gum_deinit_embedded();
       return 1;
-  
+
     }
-  
+
     GumStalker *stalker = gum_stalker_new();
-  
-    GumAddress     base_address;
+
+    GumAddress base_address;
     if (argc > 2)
       base_address = gum_module_find_base_address(argv[1]);
     else
@@ -215,87 +306,89 @@ int main(int argc, char** argv) {
     GumMemoryRange code_range;
     if (argc > 2)
       gum_module_enumerate_ranges(argv[1], GUM_PAGE_RX, enumerate_ranges,
-                                &code_range);
+                                  &code_range);
     else
       gum_module_enumerate_ranges(TARGET_LIBRARY, GUM_PAGE_RX, enumerate_ranges,
-                                &code_range);
-  
+                                  &code_range);
+
     guint64 code_start = code_range.base_address;
     guint64 code_end = code_range.base_address + code_range.size;
-    range_t instr_range = {0, code_start, code_end};
-  
+    range_t instr_range = {0, code_start, code_end, 0, 0};
+
     printf("Frida instrumentation: base=0x%lx instrumenting=0x%lx-%lx\n",
            base_address, code_start, code_end);
     if (!code_start || !code_end) {
-  
+
       if (argc > 2)
         fprintf(stderr, "Error: no valid memory address found for %s\n",
-              argv[1]);
+                argv[1]);
       else
         fprintf(stderr, "Error: no valid memory address found for %s\n",
-              TARGET_LIBRARY);
+                TARGET_LIBRARY);
       exit(-1);
-  
+
     }
-  
+
     GumStalkerTransformer *transformer =
         gum_stalker_transformer_make_from_callback(instr_basic_block,
                                                    &instr_range, NULL);
-  
+
     // to ensure that the signatures are not optimized out
-    memcpy(__afl_area_ptr, (void *)AFL_PERSISTENT, sizeof(AFL_PERSISTENT) + 1);
+    memcpy(__afl_area_ptr, (void *)AFL_PERSISTENT, sizeof(AFL_PERSISTENT));
     memcpy(__afl_area_ptr + 32, (void *)AFL_DEFER_FORKSVR,
-           sizeof(AFL_DEFER_FORKSVR) + 1);
+           sizeof(AFL_DEFER_FORKSVR));
     __afl_manual_init();
-  
+
     //
     // any expensive target library initialization that has to be done just once
     // - put that here
     //
-  
+
     gum_stalker_follow_me(stalker, transformer, NULL);
-  
+
     while (__afl_persistent_loop(UINT32_MAX) != 0) {
-  
+
       previous_pc = 0;  // Required!
-  
-  #ifdef _DEBUG
+
+#ifdef _DEBUG
       fprintf(stderr, "CLIENT crc: %016llx len: %u\n",
               hash64(__afl_fuzz_ptr, *__afl_fuzz_len), *__afl_fuzz_len);
       fprintf(stderr, "RECV:");
       for (int i = 0; i < *__afl_fuzz_len; i++)
         fprintf(stderr, "%02x", __afl_fuzz_ptr[i]);
       fprintf(stderr, "\n");
-  #endif
-  
+#endif
+
       // STEP 3: ensure the minimum length is present and setup the target
       //         function to fuzz.
-  
+
       if (*__afl_fuzz_len > 0) {
-  
+
         __afl_fuzz_ptr[*__afl_fuzz_len] = 0;  // if you need to null terminate
         (*o_function)(__afl_fuzz_ptr, *__afl_fuzz_len);
-  
+
       }
-  
+
       // END STEP 3
-  
+
     }
-  
+
     gum_stalker_unfollow_me(stalker);
-  
+
     while (gum_stalker_garbage_collect(stalker))
       g_usleep(10000);
-  
+
     g_object_unref(stalker);
     g_object_unref(transformer);
     gum_deinit_embedded();
 
   } else {
-    char buf[8*1024] = {0};
-    int count = read(0, buf, sizeof(buf));
-    buf[8*1024-1] = '\0';
+
+    char buf[8 * 1024] = {0};
+    int  count = read(0, buf, sizeof(buf));
+    buf[8 * 1024 - 1] = '\0';
     (*o_function)(buf, count);
+
   }
 
   return 0;
diff --git a/utils/afl_untracer/afl-untracer.c b/utils/afl_untracer/afl-untracer.c
index 695f8dd1..2baeb58d 100644
--- a/utils/afl_untracer/afl-untracer.c
+++ b/utils/afl_untracer/afl-untracer.c
@@ -56,9 +56,9 @@
 #include <sys/shm.h>
 #include <sys/wait.h>
 #include <sys/types.h>
-#include <sys/personality.h>
 
 #if defined(__linux__)
+  #include <sys/personality.h>
   #include <sys/ucontext.h>
 #elif defined(__APPLE__) && defined(__LP64__)
   #include <mach-o/dyld_images.h>
@@ -143,7 +143,7 @@ void read_library_information(void) {
       b = buf;
       m = index(buf, '-');
       e = index(buf, ' ');
-      if ((n = rindex(buf, '/')) == NULL) n = rindex(buf, ' ');
+      if ((n = strrchr(buf, '/')) == NULL) n = strrchr(buf, ' ');
       if (n &&
           ((*n >= '0' && *n <= '9') || *n == '[' || *n == '{' || *n == '('))
         n = NULL;
@@ -480,6 +480,9 @@ void setup_trap_instrumentation(void) {
     // Index into the coverage bitmap for the current trap instruction.
 #ifdef __aarch64__
   uint64_t bitmap_index = 0;
+  #ifdef __APPLE__
+  pthread_jit_write_protect_np(0);
+  #endif
 #else
   uint32_t bitmap_index = 0;
 #endif
@@ -508,7 +511,6 @@ void setup_trap_instrumentation(void) {
               lib_size);
 
       lib_addr = (u8 *)lib_base->addr_start;
-
       // Make library code writable.
       if (mprotect((void *)lib_addr, lib_size,
                    PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
@@ -625,8 +627,13 @@ static void sigtrap_handler(int signum, siginfo_t *si, void *context) {
   // Must re-execute the instruction, so decrement PC by one instruction.
   ucontext_t *ctx = (ucontext_t *)context;
 #if defined(__APPLE__) && defined(__LP64__)
+  #if defined(__x86_64__)
   ctx->uc_mcontext->__ss.__rip -= 1;
   addr = ctx->uc_mcontext->__ss.__rip;
+  #else
+  ctx->uc_mcontext->__ss.__pc -= 4;
+  addr = ctx->uc_mcontext->__ss.__pc;
+  #endif
 #elif defined(__linux__)
   #if defined(__x86_64__) || defined(__i386__)
   ctx->uc_mcontext.gregs[REG_RIP] -= 1;
@@ -676,7 +683,9 @@ static void sigtrap_handler(int signum, siginfo_t *si, void *context) {
 /* the MAIN function */
 int main(int argc, char *argv[]) {
 
+#if defined(__linux__)
   (void)personality(ADDR_NO_RANDOMIZE);  // disable ASLR
+#endif
 
   pid = getpid();
   if (getenv("AFL_DEBUG")) debug = 1;
diff --git a/utils/afl_untracer/ida_get_patchpoints.py b/utils/afl_untracer/ida_get_patchpoints.py
index 43cf6d89..807685b3 100644
--- a/utils/afl_untracer/ida_get_patchpoints.py
+++ b/utils/afl_untracer/ida_get_patchpoints.py
@@ -11,6 +11,7 @@ import idc
 # See https://www.hex-rays.com/products/ida/support/ida74_idapython_no_bc695_porting_guide.shtml
 
 from os.path import expanduser
+
 home = expanduser("~")
 
 patchpoints = set()
@@ -18,7 +19,7 @@ patchpoints = set()
 max_offset = 0
 for seg_ea in idautils.Segments():
     name = idc.get_segm_name(seg_ea)
-    #print("Segment: " + name)
+    # print("Segment: " + name)
     if name != "__text" and name != ".text":
         continue
 
@@ -26,7 +27,7 @@ for seg_ea in idautils.Segments():
     end = idc.get_segm_end(seg_ea)
     first = 0
     subtract_addr = 0
-    #print("Start: " + hex(start) + " End: " + hex(end))
+    # print("Start: " + hex(start) + " End: " + hex(end))
     for func_ea in idautils.Functions(start, end):
         f = idaapi.get_func(func_ea)
         if not f:
@@ -37,10 +38,10 @@ for seg_ea in idautils.Segments():
                     if block.start_ea >= 0x1000:
                         subtract_addr = 0x1000
                         first = 1
-                        
+
                 max_offset = max(max_offset, block.start_ea)
                 patchpoints.add(block.start_ea - subtract_addr)
-            #else:
+            # else:
             #    print("Warning: broken CFG?")
 
 # Round up max_offset to page size
@@ -52,11 +53,11 @@ if rem != 0:
 print("Writing to " + home + "/Desktop/patches.txt")
 
 with open(home + "/Desktop/patches.txt", "w") as f:
-    f.write(ida_nalt.get_root_filename() + ':' + hex(size) + '\n')
-    f.write('\n'.join(map(hex, sorted(patchpoints))))
-    f.write('\n')
+    f.write(ida_nalt.get_root_filename() + ":" + hex(size) + "\n")
+    f.write("\n".join(map(hex, sorted(patchpoints))))
+    f.write("\n")
 
 print("Done, found {} patchpoints".format(len(patchpoints)))
 
 # For headless script running remove the comment from the next line
-#ida_pro.qexit()
+# ida_pro.qexit()
diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c
index 30e6ebb9..6af79e14 100644
--- a/utils/aflpp_driver/aflpp_driver.c
+++ b/utils/aflpp_driver/aflpp_driver.c
@@ -47,6 +47,7 @@ $AFL_HOME/afl-fuzz -i IN -o OUT ./a.out
 #include <sys/mman.h>
 
 #include "config.h"
+#include "types.h"
 #include "cmplog.h"
 
 #ifdef _DEBUG
@@ -172,7 +173,7 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
 // Execute any files provided as parameters.
 static int ExecuteFilesOnyByOne(int argc, char **argv) {
 
-  unsigned char *buf = malloc(MAX_FILE);
+  unsigned char *buf = (unsigned char *)malloc(MAX_FILE);
   for (int i = 1; i < argc; i++) {
 
     int fd = open(argv[i], O_RDONLY);
diff --git a/utils/custom_mutators/XmlMutatorMin.py b/utils/custom_mutators/XmlMutatorMin.py
index 4c80a2ba..3e6cd0ff 100644
--- a/utils/custom_mutators/XmlMutatorMin.py
+++ b/utils/custom_mutators/XmlMutatorMin.py
@@ -12,12 +12,13 @@ import random, re, io
 # The XmlMutatorMin class #
 ###########################
 
+
 class XmlMutatorMin:
 
     """
-        Optionals parameters:
-            seed        Seed used by the PRNG (default: "RANDOM")
-            verbose     Verbosity (default: False)
+    Optionals parameters:
+        seed        Seed used by the PRNG (default: "RANDOM")
+        verbose     Verbosity (default: False)
     """
 
     def __init__(self, seed="RANDOM", verbose=False):
@@ -41,7 +42,12 @@ class XmlMutatorMin:
         self.tree = None
 
         # High-level mutators (no database needed)
-        hl_mutators_delete = ["del_node_and_children", "del_node_but_children", "del_attribute", "del_content"]  # Delete items
+        hl_mutators_delete = [
+            "del_node_and_children",
+            "del_node_but_children",
+            "del_attribute",
+            "del_content",
+        ]  # Delete items
         hl_mutators_fuzz = ["fuzz_attribute"]  # Randomly change attribute values
 
         # Exposed mutators
@@ -74,7 +80,9 @@ class XmlMutatorMin:
 
         """ Serialize a XML document. Basic wrapper around lxml.tostring() """
 
-        return ET.tostring(tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding)
+        return ET.tostring(
+            tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding
+        )
 
     def __ver(self, version):
 
@@ -161,7 +169,7 @@ class XmlMutatorMin:
             # Randomly pick one the function calls
             (func, args) = random.choice(l)
             # Split by "," and randomly pick one of the arguments
-            value = random.choice(args.split(','))
+            value = random.choice(args.split(","))
             # Remove superfluous characters
             unclean_value = value
             value = value.strip(" ").strip("'")
@@ -170,49 +178,49 @@ class XmlMutatorMin:
             value = attrib_value
 
         # For each type, define some possible replacement values
-        choices_number =    ( \
-                                "0", \
-                                "11111", \
-                                "-128", \
-                                "2", \
-                                "-1", \
-                                "1/3", \
-                                "42/0", \
-                                "1094861636 idiv 1.0", \
-                                "-1123329771506872 idiv 3.8", \
-                                "17=$numericRTF", \
-                                str(3 + random.randrange(0, 100)), \
-                            )
-
-        choices_letter =    ( \
-                                "P" * (25 * random.randrange(1, 100)), \
-                                "%s%s%s%s%s%s", \
-                                "foobar", \
-                            )
-
-        choices_alnum =     ( \
-                                "Abc123", \
-                                "020F0302020204030204", \
-                                "020F0302020204030204" * (random.randrange(5, 20)), \
-                            )
+        choices_number = (
+            "0",
+            "11111",
+            "-128",
+            "2",
+            "-1",
+            "1/3",
+            "42/0",
+            "1094861636 idiv 1.0",
+            "-1123329771506872 idiv 3.8",
+            "17=$numericRTF",
+            str(3 + random.randrange(0, 100)),
+        )
+
+        choices_letter = (
+            "P" * (25 * random.randrange(1, 100)),
+            "%s%s%s%s%s%s",
+            "foobar",
+        )
+
+        choices_alnum = (
+            "Abc123",
+            "020F0302020204030204",
+            "020F0302020204030204" * (random.randrange(5, 20)),
+        )
 
         # Fuzz the value
-        if random.choice((True,False)) and value == "":
+        if random.choice((True, False)) and value == "":
 
             # Empty
             new_value = value
 
-        elif random.choice((True,False)) and value.isdigit():
+        elif random.choice((True, False)) and value.isdigit():
 
             # Numbers
             new_value = random.choice(choices_number)
 
-        elif random.choice((True,False)) and value.isalpha():
+        elif random.choice((True, False)) and value.isalpha():
 
             # Letters
             new_value = random.choice(choices_letter)
 
-        elif random.choice((True,False)) and value.isalnum():
+        elif random.choice((True, False)) and value.isalnum():
 
             # Alphanumeric
             new_value = random.choice(choices_alnum)
@@ -232,22 +240,25 @@ class XmlMutatorMin:
 
         # Log something
         if self.verbose:
-            print("Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+            print(
+                "Fuzzing attribute #%i '%s' of tag #%i '%s'"
+                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
+            )
 
         # Modify the attribute
         rand_elem.set(rand_attrib, new_value.decode("utf-8"))
 
     def __del_node_and_children(self):
 
-        """ High-level minimizing mutator
-            Delete a random node and its children (i.e. delete a random tree) """
+        """High-level minimizing mutator
+        Delete a random node and its children (i.e. delete a random tree)"""
 
         self.__del_node(True)
 
     def __del_node_but_children(self):
 
-        """ High-level minimizing mutator
-            Delete a random node but its children (i.e. link them to the parent of the deleted node) """
+        """High-level minimizing mutator
+        Delete a random node but its children (i.e. link them to the parent of the deleted node)"""
 
         self.__del_node(False)
 
@@ -270,7 +281,10 @@ class XmlMutatorMin:
         # Log something
         if self.verbose:
             but_or_and = "and" if delete_children else "but"
-            print("Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and))
+            print(
+                "Deleting tag #%i '%s' %s its children"
+                % (rand_elem_id, rand_elem.tag, but_or_and)
+            )
 
         if delete_children is False:
             # Link children of the random (soon to be deleted) node to its parent
@@ -282,8 +296,8 @@ class XmlMutatorMin:
 
     def __del_content(self):
 
-        """ High-level minimizing mutator
-            Delete the attributes and children of a random node """
+        """High-level minimizing mutator
+        Delete the attributes and children of a random node"""
 
         # Select a node to modify
         (rand_elem_id, rand_elem) = self.__pick_element()
@@ -297,8 +311,8 @@ class XmlMutatorMin:
 
     def __del_attribute(self):
 
-        """ High-level minimizing mutator
-            Delete a random attribute from a random node """
+        """High-level minimizing mutator
+        Delete a random attribute from a random node"""
 
         # Select a node to modify
         (rand_elem_id, rand_elem) = self.__pick_element()
@@ -318,7 +332,10 @@ class XmlMutatorMin:
 
         # Log something
         if self.verbose:
-            print("Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+            print(
+                "Deleting attribute #%i '%s' of tag #%i '%s'"
+                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
+            )
 
         # Delete the attribute
         rand_elem.attrib.pop(rand_attrib)
@@ -329,4 +346,3 @@ class XmlMutatorMin:
 
         # High-level mutation
         self.__exec_among(self, self.hl_mutators_all, min, max)
-
diff --git a/utils/custom_mutators/common.py b/utils/custom_mutators/common.py
index 9a1ef0a3..44a5056a 100644
--- a/utils/custom_mutators/common.py
+++ b/utils/custom_mutators/common.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 Module containing functions shared between multiple AFL modules
 
 @author:     Christian Holler (:decoder)
@@ -12,7 +12,7 @@ License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 @contact:    choller@mozilla.com
-'''
+"""
 
 from __future__ import print_function
 import random
@@ -23,18 +23,18 @@ import re
 def randel(l):
     if not l:
         return None
-    return l[random.randint(0, len(l)-1)]
+    return l[random.randint(0, len(l) - 1)]
 
 
 def randel_pop(l):
     if not l:
         return None
-    return l.pop(random.randint(0, len(l)-1))
+    return l.pop(random.randint(0, len(l) - 1))
 
 
 def write_exc_example(data, exc):
-    exc_name = re.sub(r'[^a-zA-Z0-9]', '_', repr(exc))
+    exc_name = re.sub(r"[^a-zA-Z0-9]", "_", repr(exc))
 
     if not os.path.exists(exc_name):
-        with open(exc_name, 'w') as f:
+        with open(exc_name, "w") as f:
             f.write(data)
diff --git a/utils/custom_mutators/example.py b/utils/custom_mutators/example.py
index cf659e5a..3a6d22e4 100644
--- a/utils/custom_mutators/example.py
+++ b/utils/custom_mutators/example.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 Example Python Module for AFLFuzz
 
 @author:     Christian Holler (:decoder)
@@ -12,7 +12,7 @@ License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 @contact:    choller@mozilla.com
-'''
+"""
 
 import random
 
@@ -26,12 +26,12 @@ COMMANDS = [
 
 
 def init(seed):
-    '''
+    """
     Called once when AFLFuzz starts up. Used to seed our RNG.
 
     @type seed: int
     @param seed: A 32-bit random value
-    '''
+    """
     random.seed(seed)
 
 
@@ -40,7 +40,7 @@ def deinit():
 
 
 def fuzz(buf, add_buf, max_size):
-    '''
+    """
     Called per fuzzing iteration.
 
     @type buf: bytearray
@@ -55,13 +55,14 @@ def fuzz(buf, add_buf, max_size):
 
     @rtype: bytearray
     @return: A new bytearray containing the mutated data
-    '''
+    """
     ret = bytearray(100)
 
     ret[:3] = random.choice(COMMANDS)
 
     return ret
 
+
 # Uncomment and implement the following methods if you want to use a custom
 # trimming algorithm. See also the documentation for a better API description.
 
diff --git a/utils/custom_mutators/simple-chunk-replace.py b/utils/custom_mutators/simple-chunk-replace.py
index df2f4ca7..c57218dd 100644
--- a/utils/custom_mutators/simple-chunk-replace.py
+++ b/utils/custom_mutators/simple-chunk-replace.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 Simple Chunk Cross-Over Replacement Module for AFLFuzz
 
 @author:     Christian Holler (:decoder)
@@ -12,24 +12,24 @@ License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 @contact:    choller@mozilla.com
-'''
+"""
 
 import random
 
 
 def init(seed):
-    '''
+    """
     Called once when AFLFuzz starts up. Used to seed our RNG.
 
     @type seed: int
     @param seed: A 32-bit random value
-    '''
+    """
     # Seed our RNG
     random.seed(seed)
 
 
 def fuzz(buf, add_buf, max_size):
-    '''
+    """
     Called per fuzzing iteration.
 
     @type buf: bytearray
@@ -44,7 +44,7 @@ def fuzz(buf, add_buf, max_size):
 
     @rtype: bytearray
     @return: A new bytearray containing the mutated data
-    '''
+    """
     # Make a copy of our input buffer for returning
     ret = bytearray(buf)
 
@@ -58,7 +58,9 @@ def fuzz(buf, add_buf, max_size):
     rand_dst_idx = random.randint(0, len(buf))
 
     # Make the chunk replacement
-    ret[rand_dst_idx:rand_dst_idx + fragment_len] = add_buf[rand_src_idx:rand_src_idx + fragment_len]
+    ret[rand_dst_idx : rand_dst_idx + fragment_len] = add_buf[
+        rand_src_idx : rand_src_idx + fragment_len
+    ]
 
     # Return data
     return ret
diff --git a/utils/custom_mutators/wrapper_afl_min.py b/utils/custom_mutators/wrapper_afl_min.py
index ecb03b55..5cd60031 100644
--- a/utils/custom_mutators/wrapper_afl_min.py
+++ b/utils/custom_mutators/wrapper_afl_min.py
@@ -27,7 +27,7 @@ def log(text):
 
 def init(seed):
     """
-          Called once when AFL starts up. Seed is used to identify the AFL instance in log files
+    Called once when AFL starts up. Seed is used to identify the AFL instance in log files
     """
 
     global __mutator__
@@ -72,7 +72,10 @@ def fuzz(buf, add_buf, max_size):
     if via_buffer:
         try:
             __mutator__.init_from_string(buf_str)
-            log("fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" % len(buf_str))
+            log(
+                "fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)"
+                % len(buf_str)
+            )
         except Exception:
             via_buffer = False
             log("fuzz(): Can't initialize mutator with AFL buffer")
@@ -104,7 +107,7 @@ def fuzz(buf, add_buf, max_size):
 
 
 # Main (for debug)
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     __log__ = True
     __log_file__ = "/dev/stdout"
@@ -112,7 +115,9 @@ if __name__ == '__main__':
 
     init(__seed__)
 
-    in_1 = bytearray("<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>")
+    in_1 = bytearray(
+        "<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>"
+    )
     in_2 = bytearray("<abc abc123='456' abcCBA='ppppppppppppppppppppppppppppp'/>")
     out = fuzz(in_1, in_2)
     print(out)
diff --git a/utils/defork/defork.c b/utils/defork/defork.c
index f50b9a4b..c9be3283 100644
--- a/utils/defork/defork.c
+++ b/utils/defork/defork.c
@@ -5,6 +5,7 @@
 #include <stdbool.h>
 
 #include "../../include/config.h"
+#include "../../include/types.h"
 
 /* we want to fork once (for the afl++ forkserver),
    then immediately return as child on subsequent forks. */