33 files changed, 946 insertions, 217 deletions
diff --git a/Dockerfile b/Dockerfile
index b59e91fb..905e8265 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,8 +11,8 @@ LABEL "about"="AFLplusplus docker image"
 
 ARG DEBIAN_FRONTEND=noninteractive
 
-RUN apt-get update && apt-get -y install \
-    --no-install-suggests --no-install-recommends \
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get -y install --no-install-suggests --no-install-recommends \
     automake \
     bison flex \
     build-essential \
@@ -21,7 +21,7 @@ RUN apt-get update && apt-get -y install \
     libtool libtool-bin \
     libglib2.0-dev \
     wget vim jupp nano \
-    apt-utils apt-transport-https ca-certificates gnupg \
+    apt-utils apt-transport-https ca-certificates gnupg dialog \
     libpixman-1-dev
 
 RUN echo deb http://apt.llvm.org/focal/ llvm-toolchain-focal main >> /etc/apt/sources.list && \
@@ -33,7 +33,7 @@ RUN echo deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main
 RUN apt-get update && apt-get upgrade -y
 
 RUN apt-get install -y gcc-10 g++-10 gcc-10-plugin-dev gcc-10-multilib \
-    libc++-10-dev gdb
+    libc++-10-dev gdb lcov
 
 RUN apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
     libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \
@@ -44,15 +44,19 @@ RUN apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 0
 RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 0
 
-RUN rm -rf /var/lib/apt/lists/*
+RUN rm -rf /var/cache/apt/archives/*
 
 ARG CC=gcc-10
 ARG CXX=g++-10
 ARG LLVM_CONFIG=llvm-config-11
 
 RUN git clone https://github.com/AFLplusplus/AFLplusplus
+RUN cd AFLplusplus && export REAL_CXX=g++-10 && make distrib && \
+    make install && make clean
 
-RUN cd AFLplusplus && export REAL_CXX=g++ && make distrib && \
-    make install && cd .. && make clean
+RUN git clone https://github.com/vanhauser-thc/afl-cov afl-cov
+RUN cd afl-cov && make install
+
+RUN echo 'alias joe="jupp --wordwrap"' >> ~/.bashrc
 
 ENV AFL_SKIP_CPUFREQ=1
diff --git a/GNUmakefile b/GNUmakefile
index fbcc53de..8f559391 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -51,9 +51,9 @@ endif
 endif
 
 ifneq "$(shell uname)" "Darwin"
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_OPT += -march=native
- endif
+ #ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+ #	CFLAGS_OPT += -march=native
+ #endif
  # OS X does not like _FORTIFY_SOURCE=2
  CFLAGS_OPT += -D_FORTIFY_SOURCE=2
 endif
@@ -503,7 +503,7 @@ distrib: all radamsa
 	$(MAKE) -C examples/afl_network_proxy
 	$(MAKE) -C examples/socket_fuzzing
 	$(MAKE) -C examples/argv_fuzzing
-	cd qemu_mode && sh ./build_qemu_support.sh
+	-cd qemu_mode && sh ./build_qemu_support.sh
 	cd unicorn_mode && sh ./build_unicorn_support.sh
 
 binary-only: all radamsa
@@ -512,7 +512,7 @@ binary-only: all radamsa
 	$(MAKE) -C examples/afl_network_proxy
 	$(MAKE) -C examples/socket_fuzzing
 	$(MAKE) -C examples/argv_fuzzing
-	cd qemu_mode && sh ./build_qemu_support.sh
+	-cd qemu_mode && sh ./build_qemu_support.sh
 	cd unicorn_mode && sh ./build_unicorn_support.sh
 
 source-only: all radamsa
diff --git a/README.md b/README.md
index 5af2b0fc..a906a2c0 100644
--- a/README.md
+++ b/README.md
@@ -133,13 +133,21 @@ For everyone who wants to contribute (and send pull requests) please read
 
 ## Building and installing afl++
 
-afl++ has many build options.
+An easy way to install afl++ with everything compiled is available via docker:
+```shell
+docker pull aflplusplus/aflplusplus
+docker run -ti -v /location/of/your/target:/src aflplusplus/aflplusplus
+```
+This container is automatically generated when a push to master happens.
+You will find your target source code in /src in the container.
+
+If you want to build afl++ yourself you have many options.
 The easiest is to build and install everything:
 
 ```shell
-$ sudo apt install build-essential libtool-bin python3-dev automake flex bison libglib2.0-dev libpixman-1-dev clang python3-setuptools llvm
-$ make distrib
-$ sudo make install
+sudo apt install build-essential libtool-bin python3-dev automake flex bison libglib2.0-dev libpixman-1-dev clang python3-setuptools llvm
+make distrib
+sudo make install
 ```
 
 Note that "make distrib" also builds llvm_mode, qemu_mode, unicorn_mode and
@@ -148,7 +156,7 @@ using at least llvm_mode is highly recommended for much better results -
 hence in this case
 
 ```shell
-$ make source-only
+make source-only
 ```
 is what you should choose.
 
@@ -171,7 +179,7 @@ These build targets exist:
 afl++ binaries by passing the STATIC=1 argument to make:
 
 ```shell
-$ make all STATIC=1
+make all STATIC=1
 ```
 
 These build options exist:
@@ -191,8 +199,8 @@ Hence at least gcc-9 and especially llvm-9 should be the compilers of choice.
 If your distribution does not have them, you can use the Dockerfile:
 
 ```shell
-$ cd AFLplusplus
-$ sudo docker build -t aflplusplus .
+cd AFLplusplus
+sudo docker build -t aflplusplus .
 ```
 
 
@@ -281,8 +289,8 @@ The correct way to recompile the target program may vary depending on the
 specifics of the build process, but a nearly-universal approach would be:
 
 ```shell
-$ CC=/path/to/afl/afl-gcc ./configure
-$ make clean all
+CC=/path/to/afl/afl-gcc ./configure
+make clean all
 ```
 
 For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
@@ -306,7 +314,7 @@ runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
 build, usually possible via:
 
 ```shell
-$ CC=/path/to/afl/afl-gcc ./configure --disable-shared
+CC=/path/to/afl/afl-gcc ./configure --disable-shared
 ```
 
 Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
@@ -328,8 +336,8 @@ QEMU is a project separate from AFL, but you can conveniently build the
 feature by doing:
 
 ```shell
-$ cd qemu_mode
-$ ./build_qemu_support.sh
+cd qemu_mode
+./build_qemu_support.sh
 ```
 
 For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md).
@@ -423,7 +431,7 @@ store its findings, plus a path to the binary to test.
 For target binaries that accept input directly from stdin, the usual syntax is:
 
 ```shell
-$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
+./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
 ```
 
 For programs that take input from a file, use '@@' to mark the location in
@@ -431,7 +439,7 @@ the target's command line where the input file name should be placed. The
 fuzzer will substitute this for you:
 
 ```shell
-$ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
+./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
 ```
 
 You can also use the -f option to have the mutated data written to a specific
@@ -494,8 +502,8 @@ When you can't reproduce a crash found by afl-fuzz, the most likely cause is
 that you are not setting the same memory limit as used by the tool. Try:
 
 ```shell
-$ LIMIT_MB=50
-$ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
+LIMIT_MB=50
+( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
 ```
 
 Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
@@ -504,7 +512,7 @@ also change -Sv to -Sd.
 Any existing output directory can be also used to resume aborted jobs; try:
 
 ```shell
-$ ./afl-fuzz -i- -o existing_output_dir [...etc...]
+./afl-fuzz -i- -o existing_output_dir [...etc...]
 ```
 
 If you have gnuplot installed, you can also generate some pretty graphs for any
@@ -586,7 +594,7 @@ Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
 can be operated in a very simple way:
 
 ```shell
-$ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
+./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
 ```
 
 The tool works with crashing and non-crashing test cases alike. In the crash
diff --git a/TODO.md b/TODO.md
index 3ee8d091..b7d51369 100644
--- a/TODO.md
+++ b/TODO.md
@@ -9,6 +9,7 @@
  - learn from honggfuzz
  - for persistent mode, have a functionality that transports the test case
    via shared memory (and the int write to the FD from afl-fuzz is the size)
+ - CPU affinity for many cores?
 
 ## Further down the road
 
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 6115a0cc..e7ba208c 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -12,10 +12,11 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
 ### Version ++2.65d (dev)
   - afl-fuzz:
      - -S slaves now only sync from the master to increase performance,
-       the -M master stilly syncs from everyone. Added checks that exactly
-       one master is present and warn otherwise
+       the -M master still syncs from everyone. Added checks that ensure
+       exactly one master is present and warn otherwise
      - If no master is present at a sync one slave automatically becomes
        a temporary master until a real master shows up
+     - fix/update to MOpt (thanks to arnow117)
   - llvm_mode:
     - the default instrumentation is now PCGUARD, as it is faster and provides
       better coverage. The original afl instrumentation can be set via
@@ -27,9 +28,14 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
     - added AFL_LLVM_LAF_ALL, sets all laf-intel settings
     - LTO whitelist functionality rewritten, now main, _init etc functions
       need not to be whitelisted anymore
-    - fixed crash in compare-transform-pass when strcasemp/strncasecmp was
+    - fixed crash in compare-transform-pass when strcasecmp/strncasecmp was
       tried to be instrumented with LTO
     - fixed crash in cmplog with LTO
+    - enable snapshot lkm also for persistent mode
+  - persistent mode shared memory testcase handover (instead of via
+    files/stdin) - 10-100% performance increase
+  - General support for 64 bit PowerPC, RiscV, Sparc etc.
+  - slightly better performance compilation options for afl++ and targets
   - fixed afl-gcc/afl-as that could break on fast systems reusing pids in
     the same second
   - added lots of dictionaries from oss-fuzz, go-fuzz and Jakub Wilk
diff --git a/docs/INSTALL.md b/docs/INSTALL.md
index 0f9673ad..766f24d7 100644
--- a/docs/INSTALL.md
+++ b/docs/INSTALL.md
@@ -17,7 +17,7 @@ You can start using the fuzzer without installation, but it is also possible to
 install it with:
 
 ```bash
-make install
+sudo make install
 ```
 
 There are no special dependencies to speak of; you will need GNU make and a
@@ -46,7 +46,7 @@ please install it first. As on Linux, you can use the fuzzer itself without
 installation, or install it with:
 
 ```
-gmake install
+sudo gmake install
 ```
 
 Keep in mind that if you are using csh as your shell, the syntax of some of the
diff --git a/docs/parallel_fuzzing.md b/docs/parallel_fuzzing.md
index 12aefb46..c6e54218 100644
--- a/docs/parallel_fuzzing.md
+++ b/docs/parallel_fuzzing.md
@@ -40,14 +40,14 @@ for every instance - say, "fuzzer01", "fuzzer02", etc.
 Run the first one ("master", -M) like this:
 
 ```
-$ ./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
+./afl-fuzz -i testcase_dir -o sync_dir -M fuzzer01 [...other stuff...]
 ```
 
 ...and then, start up secondary (-S) instances like this:
 
 ```
-$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
-$ ./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
+./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer02 [...other stuff...]
+./afl-fuzz -i testcase_dir -o sync_dir -S fuzzer03 [...other stuff...]
 ```
 
 Each fuzzer will keep its state in a separate subdirectory, like so:
@@ -71,9 +71,9 @@ experimental support for parallelizing the deterministic checks. To leverage
 that, you need to create -M instances like so:
 
 ```
-$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterA:1/3 [...]
-$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterB:2/3 [...]
-$ ./afl-fuzz -i testcase_dir -o sync_dir -M masterC:3/3 [...]
+./afl-fuzz -i testcase_dir -o sync_dir -M masterA:1/3 [...]
+./afl-fuzz -i testcase_dir -o sync_dir -M masterB:2/3 [...]
+./afl-fuzz -i testcase_dir -o sync_dir -M masterC:3/3 [...]
 ```
 
 ...where the first value after ':' is the sequential ID of a particular master
@@ -91,9 +91,9 @@ must use a separate temporary file; otherwise, things will go south. One safe
 example may be:
 
 ```
-$ ./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
-$ ./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
-$ ./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
+./afl-fuzz [...] -S fuzzer10 -f file10.txt ./fuzzed/binary @@
+./afl-fuzz [...] -S fuzzer11 -f file11.txt ./fuzzed/binary @@
+./afl-fuzz [...] -S fuzzer12 -f file12.txt ./fuzzed/binary @@
 ```
 
 This is not a concern if you use @@ without -f and let afl-fuzz come up with the
diff --git a/examples/afl_network_proxy/README.md b/examples/afl_network_proxy/README.md
index 42c0b71b..a5ac3578 100644
--- a/examples/afl_network_proxy/README.md
+++ b/examples/afl_network_proxy/README.md
@@ -29,7 +29,7 @@ Run `afl-network-server` with your target with the -m and -t values you need.
 Important is the -i parameter which is the TCP port to listen on.
 e.g.:
 ```
-$ afl-network-server -i 1111 -m 25M -t 1000 -- /bin/target -f @@
+afl-network-server -i 1111 -m 25M -t 1000 -- /bin/target -f @@
 ```
 
 ### on the (afl-fuzz) master
@@ -38,7 +38,7 @@ Just run afl-fuzz with your normal options, however the target should be
 `afl-network-client` with the IP and PORT of the `afl-network-server` and
 increase the -t value:
 ```
-$ afl-fuzz -i in -o out -t 2000+ -- afl-network-client TARGET-IP 1111
+afl-fuzz -i in -o out -t 2000+ -- afl-network-client TARGET-IP 1111
 ```
 Note the '+' on the -t parameter value. The afl-network-server will take
 care of proper timeouts hence afl-fuzz should not. The '+' increases the
diff --git a/examples/afl_untracer/README.md b/examples/afl_untracer/README.md
index 05fd8776..e59792cb 100644
--- a/examples/afl_untracer/README.md
+++ b/examples/afl_untracer/README.md
@@ -29,8 +29,8 @@ The patches.txt file has to be pointed to by `AFL_UNTRACER_FILE`.
 
 To easily run the scripts without needing to run the GUI with Ghidra:
 ```
-$ /opt/ghidra/support/analyzeHeadless /tmp/ tmp$$ -import libtestinstr.so -postscript ./ghidra_get_patchpoints.java
-$ rm -rf /tmp/tmp$$
+/opt/ghidra/support/analyzeHeadless /tmp/ tmp$$ -import libtestinstr.so -postscript ./ghidra_get_patchpoints.java
+rm -rf /tmp/tmp$$
 ```
 
 ### Fuzzing
diff --git a/examples/aflpp_driver/GNUmakefile b/examples/aflpp_driver/GNUmakefile
new file mode 100644
index 00000000..a681d2cf
--- /dev/null
+++ b/examples/aflpp_driver/GNUmakefile
@@ -0,0 +1,21 @@
+ifeq "" "$(LLVM_CONFIG)"
+  LLVM_CONFIG=llvm-config
+endif
+
+LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
+ifneq "" "$(LLVM_BINDIR)"
+  LLVM_BINDIR := $(LLVM_BINDIR)/
+endif
+
+FLAGS=-O3 -funroll-loops
+
+all:	libAFLDriver.a
+
+aflpp_driver.o:	aflpp_driver.cpp
+	$(LLVM_BINDIR)clang++ $(FLAGS) -stdlib=libc++ -funroll-loops -std=c++11 -c aflpp_driver.cpp
+
+libAFLDriver.a:	aflpp_driver.o
+	ar ru libAFLDriver.a aflpp_driver.o
+
+clean:
+	rm -f *.o libAFLDriver*.a *~ core
diff --git a/examples/aflpp_driver/Makefile b/examples/aflpp_driver/Makefile
new file mode 100644
index 00000000..3666a74d
--- /dev/null
+++ b/examples/aflpp_driver/Makefile
@@ -0,0 +1,2 @@
+all:
+	@gmake all || echo please install GNUmake
diff --git a/examples/aflpp_driver/aflpp_driver.cpp b/examples/aflpp_driver/aflpp_driver.cpp
new file mode 100644
index 00000000..3dcc8c3c
--- /dev/null
+++ b/examples/aflpp_driver/aflpp_driver.cpp
@@ -0,0 +1,281 @@
+//===- afl_driver.cpp - a glue between AFL and libFuzzer --------*- C++ -* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+
+/* This file allows to fuzz libFuzzer-style target functions
+ (LLVMFuzzerTestOneInput) with AFL using AFL's persistent (in-process) mode.
+
+Usage:
+################################################################################
+cat << EOF > test_fuzzer.cc
+#include <stddef.h>
+#include <stdint.h>
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  if (size > 0 && data[0] == 'H')
+    if (size > 1 && data[1] == 'I')
+       if (size > 2 && data[2] == '!')
+       __builtin_trap();
+  return 0;
+}
+EOF
+# Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang.
+clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c
+# Build afl-llvm-rt.o.c from the AFL distribution.
+clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c
+# Build this file, link it with afl-llvm-rt.o.o and the target code.
+clang++ afl_driver.cpp test_fuzzer.o afl-llvm-rt.o.o
+# Run AFL:
+rm -rf IN OUT; mkdir IN OUT; echo z > IN/z;
+$AFL_HOME/afl-fuzz -i IN -o OUT ./a.out
+################################################################################
+AFL_DRIVER_STDERR_DUPLICATE_FILENAME: Setting this *appends* stderr to the file
+specified. If the file does not exist, it is created. This is useful for getting
+stack traces (when using ASAN for example) or original error messages on hard
+to reproduce bugs. Note that any content written to stderr will be written to
+this file instead of stderr's usual location.
+
+AFL_DRIVER_CLOSE_FD_MASK: Similar to libFuzzer's -close_fd_mask behavior option.
+If 1, close stdout at startup. If 2 close stderr; if 3 close both.
+
+*/
+#include <assert.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+// Platform detection. Copied from FuzzerInternal.h
+#ifdef __linux__
+#define LIBFUZZER_LINUX 1
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#elif __APPLE__
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_APPLE 1
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#elif __NetBSD__
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_NETBSD 1
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#elif __FreeBSD__
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 1
+#define LIBFUZZER_OPENBSD 0
+#elif __OpenBSD__
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 1
+#else
+#error "Support for your platform has not been implemented"
+#endif
+
+int __afl_sharedmem_fuzzing = 1;
+extern unsigned int __afl_fuzz_len;
+extern unsigned char *__afl_fuzz_ptr;
+
+// libFuzzer interface is thin, so we don't include any libFuzzer headers.
+extern "C" {
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+__attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv);
+}
+
+// Notify AFL about persistent mode.
+static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##";
+extern "C" int __afl_persistent_loop(unsigned int);
+static volatile char suppress_warning2 = AFL_PERSISTENT[0];
+
+// Notify AFL about deferred forkserver.
+static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##";
+extern "C" void __afl_manual_init();
+static volatile char suppress_warning1 = AFL_DEFER_FORKSVR[0];
+
+// Input buffer.
+static const size_t kMaxAflInputSize = 1 << 20;
+static uint8_t AflInputBuf[kMaxAflInputSize];
+
+// Use this optionally defined function to output sanitizer messages even if
+// user asks to close stderr.
+__attribute__((weak)) extern "C" void __sanitizer_set_report_fd(void *);
+
+// Keep track of where stderr content is being written to, so that
+// dup_and_close_stderr can use the correct one.
+static FILE *output_file = stderr;
+
+// Experimental feature to use afl_driver without AFL's deferred mode.
+// Needs to run before __afl_auto_init.
+__attribute__((constructor(0))) static void __decide_deferred_forkserver(void) {
+  if (getenv("AFL_DRIVER_DONT_DEFER")) {
+    if (unsetenv("__AFL_DEFER_FORKSRV")) {
+      perror("Failed to unset __AFL_DEFER_FORKSRV");
+      abort();
+    }
+  }
+}
+
+// If the user asks us to duplicate stderr, then do it.
+static void maybe_duplicate_stderr() {
+  char *stderr_duplicate_filename =
+      getenv("AFL_DRIVER_STDERR_DUPLICATE_FILENAME");
+
+  if (!stderr_duplicate_filename)
+    return;
+
+  FILE *stderr_duplicate_stream =
+      freopen(stderr_duplicate_filename, "a+", stderr);
+
+  if (!stderr_duplicate_stream) {
+    fprintf(
+        stderr,
+        "Failed to duplicate stderr to AFL_DRIVER_STDERR_DUPLICATE_FILENAME");
+    abort();
+  }
+  output_file = stderr_duplicate_stream;
+}
+
+// Most of these I/O functions were inspired by/copied from libFuzzer's code.
+static void discard_output(int fd) {
+  FILE *temp = fopen("/dev/null", "w");
+  if (!temp)
+    abort();
+  dup2(fileno(temp), fd);
+  fclose(temp);
+}
+
+static void close_stdout() { discard_output(STDOUT_FILENO); }
+
+// Prevent the targeted code from writing to "stderr" but allow sanitizers and
+// this driver to do so.
+static void dup_and_close_stderr() {
+  int output_fileno = fileno(output_file);
+  int output_fd = dup(output_fileno);
+  if (output_fd <= 0)
+    abort();
+  FILE *new_output_file = fdopen(output_fd, "w");
+  if (!new_output_file)
+    abort();
+  if (!__sanitizer_set_report_fd)
+    return;
+  __sanitizer_set_report_fd(reinterpret_cast<void *>(output_fd));
+  discard_output(output_fileno);
+}
+
+static void Printf(const char *Fmt, ...) {
+  va_list ap;
+  va_start(ap, Fmt);
+  vfprintf(output_file, Fmt, ap);
+  va_end(ap);
+  fflush(output_file);
+}
+
+// Close stdout and/or stderr if user asks for it.
+static void maybe_close_fd_mask() {
+  char *fd_mask_str = getenv("AFL_DRIVER_CLOSE_FD_MASK");
+  if (!fd_mask_str)
+    return;
+  int fd_mask = atoi(fd_mask_str);
+  if (fd_mask & 2)
+    dup_and_close_stderr();
+  if (fd_mask & 1)
+    close_stdout();
+}
+
+// Define LLVMFuzzerMutate to avoid link failures for targets that use it
+// with libFuzzer's LLVMFuzzerCustomMutator.
+extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+  assert(false && "LLVMFuzzerMutate should not be called from afl_driver");
+  return 0;
+}
+
+// Execute any files provided as parameters.
+static int ExecuteFilesOnyByOne(int argc, char **argv) {
+  for (int i = 1; i < argc; i++) {
+    std::ifstream in(argv[i], std::ios::binary);
+    in.seekg(0, in.end);
+    size_t length = in.tellg();
+    in.seekg (0, in.beg);
+    std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl;
+    // Allocate exactly length bytes so that we reliably catch buffer overflows.
+    std::vector<char> bytes(length);
+    in.read(bytes.data(), bytes.size());
+    assert(in);
+    LLVMFuzzerTestOneInput(reinterpret_cast<const uint8_t *>(bytes.data()),
+                           bytes.size());
+    std::cout << "Execution successful" << std::endl;
+  }
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  Printf(
+      "======================= INFO =========================\n"
+      "This binary is built for AFL-fuzz.\n"
+      "To run the target function on individual input(s) execute this:\n"
+      "  %s < INPUT_FILE\n"
+      "or\n"
+      "  %s INPUT_FILE1 [INPUT_FILE2 ... ]\n"
+      "To fuzz with afl-fuzz execute this:\n"
+      "  afl-fuzz [afl-flags] %s [-N]\n"
+      "afl-fuzz will run N iterations before "
+      "re-spawning the process (default: 1000)\n"
+      "======================================================\n",
+          argv[0], argv[0], argv[0]);
+
+  maybe_duplicate_stderr();
+  maybe_close_fd_mask();
+  if (LLVMFuzzerInitialize)
+    LLVMFuzzerInitialize(&argc, &argv);
+  // Do any other expensive one-time initialization here.
+
+  int N = 1000;
+  if (argc == 2 && argv[1][0] == '-')
+      N = atoi(argv[1] + 1);
+  else if(argc == 2 && (N = atoi(argv[1])) > 0)
+      Printf("WARNING: using the deprecated call style `%s %d`\n", argv[0], N);
+  else if (argc > 1) {
+    if (!getenv("AFL_DRIVER_DONT_DEFER")) {
+      __afl_sharedmem_fuzzing = 0;
+      __afl_manual_init();
+    }
+    return ExecuteFilesOnyByOne(argc, argv);
+    exit(0);
+  }
+
+  assert(N > 0);
+
+  if (!getenv("AFL_DRIVER_DONT_DEFER"))
+    __afl_manual_init();
+
+  // Call LLVMFuzzerTestOneInput here so that coverage caused by initialization
+  // on the first execution of LLVMFuzzerTestOneInput is ignored.
+  uint8_t dummy_input[1] = {0};
+  LLVMFuzzerTestOneInput(dummy_input, 1);
+
+  int num_runs = 0;
+  while (__afl_persistent_loop(N)) {
+    if (__afl_fuzz_len > 0) {
+      num_runs++;
+      LLVMFuzzerTestOneInput(__afl_fuzz_ptr, __afl_fuzz_len);
+    }
+  }
+  Printf("%s: successfully executed %d input(s)\n", argv[0], num_runs);
+}
diff --git a/examples/persistent_demo/Makefile b/examples/persistent_demo/Makefile
new file mode 100644
index 00000000..cbbb7239
--- /dev/null
+++ b/examples/persistent_demo/Makefile
@@ -0,0 +1,6 @@
+all:
+	afl-clang-fast -o persistent_demo persistent_demo.c
+	afl-clang-fast -o persistent_demo_new persistent_demo_new.c
+
+clean:
+	rm -f persistent_demo persistent_demo_new
diff --git a/examples/persistent_demo/persistent_demo.c b/examples/persistent_demo/persistent_demo.c
index 36f12850..2da49bb0 100644
--- a/examples/persistent_demo/persistent_demo.c
+++ b/examples/persistent_demo/persistent_demo.c
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
                We just have some trivial inline code that faults on 'foo!'. */
 
     /* do we have enough data? */
-    if (len < 4) return 0;
+    if (len < 8) continue;
 
     if (buf[0] == 'f') {
 
@@ -77,7 +77,17 @@ int main(int argc, char **argv) {
           if (buf[3] == '!') {
 
             printf("four\n");
-            abort();
+            if (buf[4] == '!') {
+
+              printf("five\n");
+              if (buf[5] == '!') {
+
+                printf("six\n");
+                abort();
+
+              }
+
+            }
 
           }
 
diff --git a/examples/persistent_demo/persistent_demo_new.c b/examples/persistent_demo/persistent_demo_new.c
new file mode 100644
index 00000000..36411e13
--- /dev/null
+++ b/examples/persistent_demo/persistent_demo_new.c
@@ -0,0 +1,97 @@
+/*
+   american fuzzy lop++ - persistent mode example
+   --------------------------------------------
+
+   Originally written by Michal Zalewski
+
+   Copyright 2015 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This file demonstrates the high-performance "persistent mode" that may be
+   suitable for fuzzing certain fast and well-behaved libraries, provided that
+   they are stateless or that their internal state can be easily reset
+   across runs.
+
+   To make this work, the library and this shim need to be compiled in LLVM
+   mode using afl-clang-fast (other compiler wrappers will *not* work).
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <string.h>
+
+__AFL_FUZZ_INIT();
+
+/* Main entry point. */
+
+int main(int argc, char **argv) {
+
+  ssize_t        len;                        /* how much input did we read? */
+  unsigned char *buf;                        /* test case buffer pointer    */
+
+  /* The number passed to __AFL_LOOP() controls the maximum number of
+     iterations before the loop exits and the program is allowed to
+     terminate normally. This limits the impact of accidental memory leaks
+     and similar hiccups. */
+
+  buf = __AFL_FUZZ_TESTCASE_BUF;
+
+  while (__AFL_LOOP(1000)) {
+
+    len = __AFL_FUZZ_TESTCASE_LEN;
+
+    /* do we have enough data? */
+    if (len < 8) continue;
+
+    if (buf[0] == 'f') {
+
+      printf("one\n");
+      if (buf[1] == 'o') {
+
+        printf("two\n");
+        if (buf[2] == 'o') {
+
+          printf("three\n");
+          if (buf[3] == '!') {
+
+            printf("four\n");
+            if (buf[4] == '!') {
+
+              printf("five\n");
+              if (buf[6] == '!') {
+
+                printf("six\n");
+                abort();
+
+              }
+
+            }
+
+          }
+
+        }
+
+      }
+
+    }
+
+    /*** END PLACEHOLDER CODE ***/
+
+  }
+
+  /* Once the loop is exited, terminate normally - AFL will restart the process
+     when this happens, with a clean slate when it comes to allocated memory,
+     leftover file descriptors, etc. */
+
+  return 0;
+
+}
+
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 6e74f824..32ae2a58 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -342,6 +342,7 @@ typedef struct afl_state {
 
   afl_forkserver_t fsrv;
   sharedmem_t      shm;
+  sharedmem_t *    shm_fuzz;
   afl_env_vars_t   afl_env;
 
   char **argv;                                            /* argv if needed */
diff --git a/include/config.h b/include/config.h
index 6fde8b36..57efd0f6 100644
--- a/include/config.h
+++ b/include/config.h
@@ -304,6 +304,10 @@
 
 #define SHM_ENV_VAR "__AFL_SHM_ID"
 
+/* Environment variable used to pass SHM FUZZ ID to the called program. */
+
+#define SHM_FUZZ_ENV_VAR "__AFL_SHM_FUZZ_ID"
+
 /* Other less interesting, internal-only variables. */
 
 #define CLANG_ENV_VAR "__AFL_CLANG_MODE"
diff --git a/include/forkserver.h b/include/forkserver.h
index e8ac2837..00555d7e 100644
--- a/include/forkserver.h
+++ b/include/forkserver.h
@@ -73,10 +73,18 @@ typedef struct afl_forkserver {
 
   u8 last_kill_signal;                  /* Signal that killed the child     */
 
+  u8 use_shdmen_fuzz;                   /* use shared mem for test cases    */
+
+  u8 support_shdmen_fuzz;               /* set by afl-fuzz                  */
+
   u8 use_fauxsrv;                       /* Fauxsrv for non-forking targets? */
 
   u8 qemu_mode;                         /* if running in qemu mode or not   */
 
+  u32 shdmem_fuzz_len;                   /* length of the fuzzing test case */
+
+  u8 *shdmem_fuzz;                      /* allocated memory for fuzzing     */
+
   char *cmplog_binary;                  /* the name of the cmplog binary    */
 
   /* Function to kick off the forkserver child */
diff --git a/include/types.h b/include/types.h
index f95c4be2..d1e44617 100644
--- a/include/types.h
+++ b/include/types.h
@@ -43,10 +43,11 @@ typedef uint32_t u32;
 #define FS_ERROR_MMAP 16
 
 /* Reporting options */
-#define FS_OPT_ENABLED 0x8f000001
+#define FS_OPT_ENABLED 0x80000001
 #define FS_OPT_MAPSIZE 0x40000000
 #define FS_OPT_SNAPSHOT 0x20000000
 #define FS_OPT_AUTODICT 0x10000000
+#define FS_OPT_SHDMEM_FUZZ 0x01000000
 // FS_OPT_MAX_MAPSIZE is 8388608 = 0x800000 = 2^23 = 1 << 22
 #define FS_OPT_MAX_MAPSIZE ((0x00fffffe >> 1) + 1)
 #define FS_OPT_GET_MAPSIZE(x) (((x & 0x00fffffe) >> 1) + 1)
@@ -63,13 +64,10 @@ typedef uint32_t u32;
    'unsigned long long' in <bits/types.h>, so everything checks out.
 
    But on 64-bit systems, it is #ifdef'ed in the same file as 'unsigned long'.
-   Now, it only happens in circumstances where the type happens to have the
-   expected bit width, *but* the compiler does not know that... and complains
-   about 'unsigned long' being unsafe to pass to %llu.
 
  */
 
-#if defined(__x86_64__) || defined(__aarch64__)
+#ifdef __LP64__
 typedef unsigned long long u64;
 #else
 typedef uint64_t u64;
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile
index 0a99202d..50a6be2b 100644
--- a/llvm_mode/GNUmakefile
+++ b/llvm_mode/GNUmakefile
@@ -160,9 +160,9 @@ endif
 
 # After we set CC/CXX we can start makefile magic tests
 
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_OPT = -march=native
-endif
+#ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+#	CFLAGS_OPT = -march=native
+#endif
 
 ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
         AFL_CLANG_FLTO ?= -flto=full
@@ -355,15 +355,15 @@ endif
 	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) afl-llvm-common.o
 
 ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps
-	$(CC) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@
+	$(CLANG_BIN) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@
 
 ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps
 	@printf "[*] Building 32-bit variant of the runtime (-m32)... "
-	@$(CC) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
+	@$(CC_SAVE) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
 
 ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps
 	@printf "[*] Building 64-bit variant of the runtime (-m64)... "
-	@$(CC) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
+	@$(CC_SAVE) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
 
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile
index 0b306dde..3666a74d 100644
--- a/llvm_mode/Makefile
+++ b/llvm_mode/Makefile
@@ -1,2 +1,2 @@
 all:
-	@echo please use GNU make, thanks!
+	@gmake all || echo please install GNUmake
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md
index 99bcc50d..48c587eb 100644
--- a/llvm_mode/README.lto.md
+++ b/llvm_mode/README.lto.md
@@ -84,21 +84,21 @@ apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
 
 Building llvm from github takes quite some long time and is not painless:
 ```
-$ sudo apt install binutils-dev  # this is *essential*!
-$ git clone https://github.com/llvm/llvm-project
-$ cd llvm-project
-$ mkdir build
-$ cd build
-$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
-$ make -j $(nproc)
-$ export PATH=`pwd`/bin:$PATH
-$ export LLVM_CONFIG=`pwd`/bin/llvm-config
-$ cd /path/to/AFLplusplus/
-$ make
-$ cd llvm_mode
-$ make
-$ cd ..
-$ make install
+sudo apt install binutils-dev  # this is *essential*!
+git clone https://github.com/llvm/llvm-project
+cd llvm-project
+mkdir build
+cd build
+cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
+make -j $(nproc)
+export PATH=`pwd`/bin:$PATH
+export LLVM_CONFIG=`pwd`/bin/llvm-config
+cd /path/to/AFLplusplus/
+make
+cd llvm_mode
+make
+cd ..
+make install
 ```
 
 ## How to use afl-clang-lto
diff --git a/llvm_mode/README.md b/llvm_mode/README.md
index 96b2762c..c24aef49 100644
--- a/llvm_mode/README.md
+++ b/llvm_mode/README.md
@@ -35,7 +35,7 @@ Once this implementation is shown to be sufficiently robust and portable, it
 will probably replace afl-clang. For now, it can be built separately and
 co-exists with the original code.
 
-The idea and much of the implementation comes from Laszlo Szekeres.
+The idea and much of the intial implementation came from Laszlo Szekeres.
 
 ## 2a) How to use this - short
 
@@ -56,6 +56,8 @@ LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make
 It is highly recommended to use the newest clang version you can put your
 hands on :)
 
+Then look at [README.persistent_mode.md](README.persistent_mode.md).
+
 ## 2b) How to use this - long
 
 In order to leverage this mechanism, you need to have clang installed on your
@@ -159,96 +161,13 @@ See [README.snapshot](README.snapshot.md)
 This is an early-stage mechanism, so field reports are welcome. You can send bug
 reports to <afl-users@googlegroups.com>.
 
-## 6) Bonus feature #1: deferred initialization
-
-AFL tries to optimize performance by executing the targeted binary just once,
-stopping it just before main(), and then cloning this "master" process to get
-a steady supply of targets to fuzz.
-
-Although this approach eliminates much of the OS-, linker- and libc-level
-costs of executing the program, it does not always help with binaries that
-perform other time-consuming initialization steps - say, parsing a large config
-file before getting to the fuzzed data.
-
-In such cases, it's beneficial to initialize the forkserver a bit later, once
-most of the initialization work is already done, but before the binary attempts
-to read the fuzzed input and parse it; in some cases, this can offer a 10x+
-performance gain. You can implement delayed initialization in LLVM mode in a
-fairly simple way.
-
-First, find a suitable location in the code where the delayed cloning can 
-take place. This needs to be done with *extreme* care to avoid breaking the
-binary. In particular, the program will probably malfunction if you select
-a location after:
-
-  - The creation of any vital threads or child processes - since the forkserver
-    can't clone them easily.
-
-  - The initialization of timers via setitimer() or equivalent calls.
-
-  - The creation of temporary files, network sockets, offset-sensitive file
-    descriptors, and similar shared-state resources - but only provided that
-    their state meaningfully influences the behavior of the program later on.
-
-  - Any access to the fuzzed input, including reading the metadata about its
-    size.
-
-With the location selected, add this code in the appropriate spot:
-
-```c
-#ifdef __AFL_HAVE_MANUAL_CONTROL
-  __AFL_INIT();
-#endif
-```
-
-You don't need the #ifdef guards, but including them ensures that the program
-will keep working normally when compiled with a tool other than afl-clang-fast.
-
-Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will
-*not* generate a deferred-initialization binary) - and you should be all set!
-
-## 7) Bonus feature #2: persistent mode
-
-Some libraries provide APIs that are stateless, or whose state can be reset in
-between processing different input files. When such a reset is performed, a
-single long-lived process can be reused to try out multiple test cases,
-eliminating the need for repeated fork() calls and the associated OS overhead.
-
-The basic structure of the program that does this would be:
-
-```c
-  while (__AFL_LOOP(1000)) {
-
-    /* Read input data. */
-    /* Call library code to be fuzzed. */
-    /* Reset state. */
-
-  }
-
-  /* Exit normally */
-```
-
-The numerical value specified within the loop controls the maximum number
-of iterations before AFL will restart the process from scratch. This minimizes
-the impact of memory leaks and similar glitches; 1000 is a good starting point,
-and going much higher increases the likelihood of hiccups without giving you
-any real performance benefits.
-
-A more detailed template is shown in ../examples/persistent_demo/.
-Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
-guards can be used to suppress it when using other compilers.
-
-Note that as with the previous mode, the feature is easy to misuse; if you
-do not fully reset the critical state, you may end up with false positives or
-waste a whole lot of CPU power doing nothing useful at all. Be particularly
-wary of memory leaks and of the state of file descriptors.
+## 6) deferred initialization, persistent mode, shared memory fuzzing
 
-PS. Because there are task switches still involved, the mode isn't as fast as
-"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
-faster than the normal fork() model, and compared to in-process fuzzing,
-should be a lot more robust.
+This is the most powerful and effective fuzzing you can do.
+Please see [README.persistent_mode.md](README.persistent_mode.md) for a
+full explanation.
 
-## 8) Bonus feature #3: 'trace-pc-guard' mode
+## 7) Bonus feature: 'trace-pc-guard' mode
 
 LLVM is shipping with a built-in execution tracing feature
 that provides AFL with the necessary tracing data without the need to
@@ -260,11 +179,8 @@ If you have not an outdated compiler and want to give it a try, build
 targets this way:
 
 ```
- libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1  make
+AFL_LLVM_INSTRUMENT=PCGUARD  make
 ```
 
-Note that this mode is about 20% slower than "vanilla" afl-clang-fast,
-and about 5-10% slower than afl-clang. This is likely because the
-instrumentation is not inlined, and instead involves a function call.
-On systems that support it, compiling your target with -flto can help
-a bit.
+Note that this us currently the default, as it is the best mode.
+If you have llvm 11 and compiled afl-clang-lto - this is the only better mode.
diff --git a/llvm_mode/README.persistent_mode.md b/llvm_mode/README.persistent_mode.md
new file mode 100644
index 00000000..7aae8faa
--- /dev/null
+++ b/llvm_mode/README.persistent_mode.md
@@ -0,0 +1,167 @@
+# llvm_mode persistent mode
+
+## 1) Introduction
+
+The most effective way is to fuzz in persistent mode, as the speed can easily
+be x10 or x20 times faster without any disadvanges.
+*All professionel fuzzing is using this mode.*
+
+This requires that the target can be called in a (or several) function(s),
+and that the state can be resetted so that multiple calls be be performed
+without memory leaking and former runs having no impact on following runs
+(this can be seen by the `stability` indicator in the `afl-fuzz` UI).
+
+Examples can be found in [examples/persistent_mode](../examples/persistent_mode).
+
+## 2) TLDR;
+
+Example `fuzz_target.c`:
+```
+#include "what_you_need_for_your_target.h"
+
+__AFL_FUZZ_INIT();
+
+main() {
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+  __AFL_INIT();
+#endif
+
+  unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;  // must be after __AFL_INIT
+
+  while (__AFL_LOOP(10000)) {
+
+    int len = __AFL_FUZZ_TESTCASE_LEN;
+    if (len < 8) continue;  // check for a required/useful minimum input length
+
+    /* Setup function call, e.g. struct target *tmp = libtarget_init() */
+    /* Call function to be fuzzed, e.g.: */
+    target_function(buf, len);
+    /* Reset state. e.g. libtarget_free(tmp) */
+
+  }
+
+  return 0;
+
+}
+```
+And then compile:
+```
+afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target
+```
+And that is it!
+The speed increase is usually x10 to x20.
+
+## 3) deferred initialization
+
+AFL tries to optimize performance by executing the targeted binary just once,
+stopping it just before main(), and then cloning this "master" process to get
+a steady supply of targets to fuzz.
+
+Although this approach eliminates much of the OS-, linker- and libc-level
+costs of executing the program, it does not always help with binaries that
+perform other time-consuming initialization steps - say, parsing a large config
+file before getting to the fuzzed data.
+
+In such cases, it's beneficial to initialize the forkserver a bit later, once
+most of the initialization work is already done, but before the binary attempts
+to read the fuzzed input and parse it; in some cases, this can offer a 10x+
+performance gain. You can implement delayed initialization in LLVM mode in a
+fairly simple way.
+
+First, find a suitable location in the code where the delayed cloning can 
+take place. This needs to be done with *extreme* care to avoid breaking the
+binary. In particular, the program will probably malfunction if you select
+a location after:
+
+  - The creation of any vital threads or child processes - since the forkserver
+    can't clone them easily.
+
+  - The initialization of timers via setitimer() or equivalent calls.
+
+  - The creation of temporary files, network sockets, offset-sensitive file
+    descriptors, and similar shared-state resources - but only provided that
+    their state meaningfully influences the behavior of the program later on.
+
+  - Any access to the fuzzed input, including reading the metadata about its
+    size.
+
+With the location selected, add this code in the appropriate spot:
+
+```c
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+  __AFL_INIT();
+#endif
+```
+
+You don't need the #ifdef guards, but including them ensures that the program
+will keep working normally when compiled with a tool other than afl-clang-fast.
+
+Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will
+*not* generate a deferred-initialization binary) - and you should be all set!
+
+## 4) persistent mode
+
+Some libraries provide APIs that are stateless, or whose state can be reset in
+between processing different input files. When such a reset is performed, a
+single long-lived process can be reused to try out multiple test cases,
+eliminating the need for repeated fork() calls and the associated OS overhead.
+
+The basic structure of the program that does this would be:
+
+```c
+  while (__AFL_LOOP(1000)) {
+
+    /* Read input data. */
+    /* Call library code to be fuzzed. */
+    /* Reset state. */
+
+  }
+
+  /* Exit normally */
+```
+
+The numerical value specified within the loop controls the maximum number
+of iterations before AFL will restart the process from scratch. This minimizes
+the impact of memory leaks and similar glitches; 1000 is a good starting point,
+and going much higher increases the likelihood of hiccups without giving you
+any real performance benefits.
+
+A more detailed template is shown in ../examples/persistent_demo/.
+Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
+guards can be used to suppress it when using other compilers.
+
+Note that as with the previous mode, the feature is easy to misuse; if you
+do not fully reset the critical state, you may end up with false positives or
+waste a whole lot of CPU power doing nothing useful at all. Be particularly
+wary of memory leaks and of the state of file descriptors.
+
+PS. Because there are task switches still involved, the mode isn't as fast as
+"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
+faster than the normal fork() model, and compared to in-process fuzzing,
+should be a lot more robust.
+
+## 5) shared memory fuzzing
+
+You can speed up the fuzzing process even more by receiving the fuzzing data
+via shared memory instead of stdin or files.
+This is a further speed multiplier of about 2x.
+
+Setting this up is very easy:
+
+After the includes set the following macro:
+
+```
+__AFL_FUZZ_INIT();
+```
+Directly at the start of main - or if you are using the deferred forkserver
+with `__AFL_INIT()`  then *after* `__AFL_INIT? :
+```
+  unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+```
+
+Then as first line after the `__AFL_LOOP` while loop:
+```
+  int len = __AFL_FUZZ_TESTCASE_LEN;
+```
+and that is all!
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index 8791c5ae..0b081ae6 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -45,11 +45,11 @@ static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 static u8   llvm_fullpath[PATH_MAX];
 static u8  instrument_mode, instrument_opt_mode, ngram_size, lto_mode, cpp_mode;
 static u8 *lto_flag = AFL_CLANG_FLTO;
-static u8 *march_opt = CFLAGS_OPT;
 static u8  debug;
 static u8  cwd[4096];
 static u8  cmplog_mode;
 u8         use_stdin = 0;                                          /* dummy */
+// static u8 *march_opt = CFLAGS_OPT;
 
 enum {
 
@@ -206,6 +206,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
+  cc_params[cc_par_cnt++] = "-Wno-unused-command-line-argument";
+
   if (lto_mode && cpp_mode)
     cc_params[cc_par_cnt++] = "-lc++";  // needed by fuzzbench, early
 
@@ -335,7 +337,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-  cc_params[cc_par_cnt++] = "-Qunused-arguments";
+  // cc_params[cc_par_cnt++] = "-Qunused-arguments";
 
   // in case LLVM is installed not via a package manager or "make install"
   // e.g. compiled download or compiled from github then it's ./lib directory
@@ -440,8 +442,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     cc_params[cc_par_cnt++] = "-g";
     cc_params[cc_par_cnt++] = "-O3";
     cc_params[cc_par_cnt++] = "-funroll-loops";
-    if (strlen(march_opt) > 1 && march_opt[0] == '-')
-      cc_params[cc_par_cnt++] = march_opt;
+    // if (strlen(march_opt) > 1 && march_opt[0] == '-')
+    //  cc_params[cc_par_cnt++] = march_opt;
 
   }
 
@@ -490,6 +492,19 @@ static void edit_params(u32 argc, char **argv, char **envp) {
    */
 
   cc_params[cc_par_cnt++] =
+      "-D__AFL_FUZZ_INIT()="
+      "int __afl_sharedmem_fuzzing = 1;"
+      "extern unsigned int __afl_fuzz_len;"
+      "extern unsigned char *__afl_fuzz_ptr;"
+      "unsigned char *__afl_fuzz_alt_ptr;";
+  cc_params[cc_par_cnt++] =
+      "-D__AFL_FUZZ_TESTCASE_BUF=(__afl_fuzz_ptr ? __afl_fuzz_ptr : "
+      "(__afl_fuzz_alt_ptr = malloc(1 * 1024 * 1024)))";
+  cc_params[cc_par_cnt++] =
+      "-D__AFL_FUZZ_TESTCASE_LEN=(__afl_fuzz_ptr ? __afl_fuzz_len : read(0, "
+      "__afl_fuzz_alt_ptr, 1 * 1024 * 1024))";
+
+  cc_params[cc_par_cnt++] =
       "-D__AFL_LOOP(_A)="
       "({ static volatile char *_B __attribute__((used)); "
       " _B = (char*)\"" PERSIST_SIG
diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c
index dac35796..3a0584e4 100644
--- a/llvm_mode/afl-llvm-rt.o.c
+++ b/llvm_mode/afl-llvm-rt.o.c
@@ -76,6 +76,8 @@ u8                  __afl_area_initial[MAP_SIZE];
 #endif
 u8 *__afl_area_ptr = __afl_area_initial;
 u8 *__afl_dictionary;
+u8 *__afl_fuzz_ptr;
+u32 __afl_fuzz_len;
 
 u32 __afl_final_loc;
 u32 __afl_map_size = MAP_SIZE;
@@ -92,6 +94,8 @@ __thread u32        __afl_prev_ctx;
 __thread u32        __afl_cmp_counter;
 #endif
 
+int __afl_sharedmem_fuzzing __attribute__((weak));
+
 struct cmp_map *__afl_cmp_map;
 
 /* Running in persistent mode? */
@@ -109,6 +113,59 @@ void send_forkserver_error(int error) {
 
 }
 
+/* SHM fuzzing setup. */
+
+static void __afl_map_shm_fuzz() {
+
+  char *id_str = getenv(SHM_FUZZ_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+    const char *   shm_file_path = id_str;
+    int            shm_fd = -1;
+    unsigned char *shm_base = NULL;
+
+    /* create the shared memory segment as if it was a file */
+    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    if (shm_fd == -1) {
+
+      fprintf(stderr, "shm_open() failed for fuzz\n");
+      send_forkserver_error(FS_ERROR_SHM_OPEN);
+      exit(1);
+
+    }
+
+    __afl_fuzz_ptr = mmap(0, MAX_FILE, PROT_READ, MAP_SHARED, shm_fd, 0);
+
+#else
+    u32 shm_id = atoi(id_str);
+
+    __afl_fuzz_ptr = shmat(shm_id, NULL, 0);
+
+#endif
+
+    /* Whooooops. */
+
+    if (__afl_fuzz_ptr == (void *)-1) {
+
+      fprintf(stderr, "Error: could not access fuzzing shared memory\n");
+      exit(1);
+
+    }
+
+    if (getenv("AFL_DEBUG"))
+      fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n");
+
+  } else {
+
+    fprintf(stderr, "Error: variable for fuzzing shared memory is not set\n");
+    exit(1);
+
+  }
+
+}
+
 /* SHM setup. */
 
 static void __afl_map_shm(void) {
@@ -310,17 +367,25 @@ static void __afl_start_snapshots(void) {
      assume we're not running in forkserver mode and just execute program. */
 
   status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT);
+  if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
   if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
     status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
-  if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
   memcpy(tmp, &status, 4);
 
   if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
 
-  if (__afl_dictionary_len > 0 && __afl_dictionary) {
+  if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
+    if ((was_killed & (0xffffffff & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ))) ==
+        (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) {
+
+      __afl_map_shm_fuzz();
+
+    }
+
     if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
         (FS_OPT_ENABLED | FS_OPT_AUTODICT)) {
 
@@ -357,7 +422,7 @@ static void __afl_start_snapshots(void) {
 
       // uh this forkserver master does not understand extended option passing
       // or does not want the dictionary
-      already_read_first = 1;
+      if (!__afl_fuzz_ptr) already_read_first = 1;
 
     }
 
@@ -378,6 +443,9 @@ static void __afl_start_snapshots(void) {
 
     }
 
+    __afl_fuzz_len = (was_killed >> 8);
+    was_killed = (was_killed & 0xff);
+
     /* If we stopped the child in persistent mode, but there was a race
        condition and afl-fuzz already issued SIGKILL, write off the old
        process. */
@@ -451,7 +519,7 @@ static void __afl_start_snapshots(void) {
 static void __afl_start_forkserver(void) {
 
 #ifdef __linux__
-  if (!is_persistent && !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") &&
+  if (/*!is_persistent &&*/ !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") &&
       afl_snapshot_init() >= 0) {
 
     __afl_start_snapshots();
@@ -473,7 +541,8 @@ static void __afl_start_forkserver(void) {
 
   if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
     status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
-  if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
   if (status) status |= (FS_OPT_ENABLED);
   memcpy(tmp, &status, 4);
 
@@ -482,10 +551,17 @@ static void __afl_start_forkserver(void) {
 
   if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
 
-  if (__afl_dictionary_len > 0 && __afl_dictionary) {
+  if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
+    if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) ==
+        (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) {
+
+      __afl_map_shm_fuzz();
+
+    }
+
     if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
         (FS_OPT_ENABLED | FS_OPT_AUTODICT)) {
 
@@ -522,7 +598,7 @@ static void __afl_start_forkserver(void) {
 
       // uh this forkserver master does not understand extended option passing
       // or does not want the dictionary
-      already_read_first = 1;
+      if (!__afl_fuzz_ptr) already_read_first = 1;
 
     }
 
@@ -544,6 +620,9 @@ static void __afl_start_forkserver(void) {
 
     }
 
+    __afl_fuzz_len = (was_killed >> 8);
+    was_killed = (was_killed & 0xff);
+
     /* If we stopped the child in persistent mode, but there was a race
        condition and afl-fuzz already issued SIGKILL, write off the old
        process. */
@@ -709,13 +788,13 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
 
 void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
-  u32 inst_ratio = 100;
-  u8 *x;
+  u32   inst_ratio = 100;
+  char *x;
 
   if (start == stop || *start) return;
 
   x = getenv("AFL_INST_RATIO");
-  if (x) inst_ratio = atoi(x);
+  if (x) inst_ratio = (u32)atoi(x);
 
   if (!inst_ratio || inst_ratio > 100) {
 
diff --git a/qemu_mode/README.md b/qemu_mode/README.md
index 50d451b6..3cf678e4 100644
--- a/qemu_mode/README.md
+++ b/qemu_mode/README.md
@@ -147,8 +147,8 @@ non-instrumented binary with the same optimization flags that are normally
 injected by afl-gcc, and make sure that the bits to be tested are statically
 linked into the binary. A common way to do this would be:
 
-$ CFLAGS="-O3 -funroll-loops" ./configure --disable-shared
-$ make clean all
+CFLAGS="-O3 -funroll-loops" ./configure --disable-shared
+make clean all
 
 Comparative measurements of execution speed or instrumentation coverage will be
 fairly meaningless if the optimization levels or instrumentation scopes don't
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index b67aedde..137a4f99 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -442,7 +442,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
     if ((status & FS_OPT_ENABLED) == FS_OPT_ENABLED) {
 
-      if (!be_quiet && getenv("AFL_DEBUG")) {
+      if (getenv("AFL_DEBUG")) {
 
         ACTF("Extended forkserver functions received (%08x).", status);
 
@@ -455,6 +455,28 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
       }
 
+      if ((status & FS_OPT_SHDMEM_FUZZ) == FS_OPT_SHDMEM_FUZZ) {
+
+        if (fsrv->support_shdmen_fuzz) {
+
+          fsrv->use_shdmen_fuzz = 1;
+          if (!be_quiet) { ACTF("Using SHARED MEMORY FUZZING feature."); }
+
+          if ((status & FS_OPT_AUTODICT) == 0) {
+
+            u32 send_status = (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ);
+            if (write(fsrv->fsrv_ctl_fd, &send_status, 4) != 4) {
+
+              FATAL("Writing to forkserver failed.");
+
+            }
+
+          }
+
+        }
+
+      }
+
       if ((status & FS_OPT_MAPSIZE) == FS_OPT_MAPSIZE) {
 
         u32 tmp_map_size = FS_OPT_GET_MAPSIZE(status);
@@ -490,7 +512,10 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
         if (fsrv->function_ptr == NULL || fsrv->function_opt == NULL) {
 
           // this is not afl-fuzz - we deny and return
-          status = (0xffffffff ^ (FS_OPT_ENABLED | FS_OPT_AUTODICT));
+          if (fsrv->use_shdmen_fuzz)
+            status = (FS_OPT_ENABLED | FS_OPT_AUTODICT | FS_OPT_SHDMEM_FUZZ);
+          else
+            status = (FS_OPT_ENABLED | FS_OPT_AUTODICT);
           if (write(fsrv->fsrv_ctl_fd, &status, 4) != 4) {
 
             FATAL("Writing to forkserver failed.");
@@ -749,39 +774,48 @@ static void afl_fsrv_kill(afl_forkserver_t *fsrv) {
 
 void afl_fsrv_write_to_testcase(afl_forkserver_t *fsrv, u8 *buf, size_t len) {
 
-  s32 fd = fsrv->out_fd;
+  if (fsrv->shdmem_fuzz) {
 
-  if (fsrv->out_file) {
+    memcpy(fsrv->shdmem_fuzz, buf, len);
+    fsrv->shdmem_fuzz_len = len;
 
-    if (fsrv->no_unlink) {
+  } else {
 
-      fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+    s32 fd = fsrv->out_fd;
 
-    } else {
+    if (fsrv->out_file) {
 
-      unlink(fsrv->out_file);                             /* Ignore errors. */
-      fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);
+      if (fsrv->no_unlink) {
 
-    }
+        fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
 
-    if (fd < 0) { PFATAL("Unable to create '%s'", fsrv->out_file); }
+      } else {
 
-  } else {
+        unlink(fsrv->out_file);                           /* Ignore errors. */
+        fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);
 
-    lseek(fd, 0, SEEK_SET);
+      }
 
-  }
+      if (fd < 0) { PFATAL("Unable to create '%s'", fsrv->out_file); }
 
-  ck_write(fd, buf, len, fsrv->out_file);
+    } else {
 
-  if (!fsrv->out_file) {
+      lseek(fd, 0, SEEK_SET);
 
-    if (ftruncate(fd, len)) { PFATAL("ftruncate() failed"); }
-    lseek(fd, 0, SEEK_SET);
+    }
 
-  } else {
+    ck_write(fd, buf, len, fsrv->out_file);
 
-    close(fd);
+    if (!fsrv->out_file) {
+
+      if (ftruncate(fd, len)) { PFATAL("ftruncate() failed"); }
+      lseek(fd, 0, SEEK_SET);
+
+    } else {
+
+      close(fd);
+
+    }
 
   }
 
@@ -795,6 +829,7 @@ fsrv_run_result_t afl_fsrv_run_target(afl_forkserver_t *fsrv, u32 timeout,
 
   s32 res;
   u32 exec_ms;
+  u32 write_value = fsrv->last_run_timed_out;
 
   /* After this memset, fsrv->trace_bits[] are effectively volatile, so we
      must prevent any earlier operations from venturing into that
@@ -804,10 +839,12 @@ fsrv_run_result_t afl_fsrv_run_target(afl_forkserver_t *fsrv, u32 timeout,
 
   MEM_BARRIER();
 
+  if (fsrv->shdmem_fuzz_len) write_value += (fsrv->shdmem_fuzz_len << 8);
+
   /* we have the fork server (or faux server) up and running
   First, tell it if the previous run timed out. */
 
-  if ((res = write(fsrv->fsrv_ctl_fd, &fsrv->last_run_timed_out, 4)) != 4) {
+  if ((res = write(fsrv->fsrv_ctl_fd, &write_value, 4)) != 4) {
 
     if (*stop_soon_p) { return 0; }
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index dd85a8f4..9349fefe 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -2153,6 +2153,30 @@ void check_binary(afl_state_t *afl, u8 *fname) {
     OKF(cPIN "Persistent mode binary detected.");
     setenv(PERSIST_ENV_VAR, "1", 1);
     afl->persistent_mode = 1;
+    // do not fail if we can not get the fuzzing shared mem
+    if ((afl->shm_fuzz = calloc(1, sizeof(sharedmem_t)))) {
+
+      // we need to set the dumb mode to not overwrite the SHM_ENV_VAR
+      if ((afl->fsrv.shdmem_fuzz = afl_shm_init(afl->shm_fuzz, MAX_FILE, 1))) {
+
+#ifdef USEMMAP
+        setenv(SHM_FUZZ_ENV_VAR, afl->shm_fuzz->g_shm_file_path, 1);
+#else
+        u8 *shm_str;
+        shm_str = alloc_printf("%d", afl->shm_fuzz->shm_id);
+        setenv(SHM_FUZZ_ENV_VAR, shm_str, 1);
+        ck_free(shm_str);
+#endif
+        afl->fsrv.support_shdmen_fuzz = 1;
+
+      } else {
+
+        free(afl->shm_fuzz);
+        afl->shm_fuzz = NULL;
+
+      }
+
+    }
 
   } else if (getenv("AFL_PERSISTENT")) {
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index ddd15c84..56f16b4c 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -4250,11 +4250,27 @@ pacemaker_fuzzing:
           u64 temp_temp_puppet =
               afl->queued_paths + afl->unique_crashes - temp_total_found;
           afl->total_puppet_find = afl->total_puppet_find + temp_temp_puppet;
-          for (i = 0; i < operator_num; ++i) {
 
-            if (MOpt_globals.cycles_v2[i] > MOpt_globals.cycles_v3[i]) {
+          if (MOpt_globals.is_pilot_mode) {
+
+            for (i = 0; i < operator_num; ++i) {
+
+              if (MOpt_globals.cycles_v2[i] > MOpt_globals.cycles_v3[i]) {
+
+                MOpt_globals.finds_v2[i] += temp_temp_puppet;
 
-              MOpt_globals.finds_v2[i] += temp_temp_puppet;
+              }
+
+            }
+
+          } else {
+
+            for (i = 0; i < operator_num; i++) {
+
+              if (afl->core_operator_cycles_puppet_v2[i] >
+                  afl->core_operator_cycles_puppet_v3[i])
+
+                afl->core_operator_finds_puppet_v2[i] += temp_temp_puppet;
 
             }
 
@@ -4437,7 +4453,6 @@ pacemaker_fuzzing:
 
         afl->total_pacemaker_time += *MOpt_globals.pTime;
         *MOpt_globals.pTime = 0;
-        afl->temp_puppet_find = afl->total_puppet_find;
         new_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
         if (MOpt_globals.is_pilot_mode) {
@@ -4448,6 +4463,7 @@ pacemaker_fuzzing:
 
         }
 
+        afl->temp_puppet_find = afl->total_puppet_find;
         u64 temp_stage_finds_puppet = 0;
         for (i = 0; i < operator_num; ++i) {
 
@@ -4530,6 +4546,16 @@ pacemaker_fuzzing:
 
         } else {
 
+          for (i = 0; i < operator_num; i++) {
+
+            afl->core_operator_finds_puppet[i] =
+                afl->core_operator_finds_puppet_v2[i];
+            afl->core_operator_cycles_puppet[i] =
+                afl->core_operator_cycles_puppet_v2[i];
+            temp_stage_finds_puppet += afl->core_operator_finds_puppet[i];
+
+          }
+
           afl->key_module = 2;
 
           afl->old_hit_count = new_hit_cnt;
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index 8a1f02a7..04450363 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -231,6 +231,16 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
     afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
                    afl->afl_env.afl_debug_child_output);
 
+    if (afl->fsrv.support_shdmen_fuzz && !afl->fsrv.use_shdmen_fuzz) {
+
+      afl_shm_deinit(afl->shm_fuzz);
+      free(afl->shm_fuzz);
+      afl->shm_fuzz = NULL;
+      afl->fsrv.support_shdmen_fuzz = 0;
+      afl->fsrv.shdmem_fuzz = NULL;
+
+    }
+
   }
 
   if (q->exec_cksum) {
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index c07371a8..e024e9a4 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -1379,6 +1379,14 @@ stop_fuzzing:
   destroy_extras(afl);
   destroy_custom_mutators(afl);
   afl_shm_deinit(&afl->shm);
+
+  if (afl->shm_fuzz) {
+
+    afl_shm_deinit(afl->shm_fuzz);
+    free(afl->shm_fuzz);
+
+  }
+
   afl_fsrv_deinit(&afl->fsrv);
   if (afl->orig_cmdline) { ck_free(afl->orig_cmdline); }
   ck_free(afl->fsrv.target_path);
diff --git a/unicorn_mode/README.md b/unicorn_mode/README.md
index 86683839..f6bd4d12 100644
--- a/unicorn_mode/README.md
+++ b/unicorn_mode/README.md
@@ -29,8 +29,8 @@ Once that completes successfully you need to build and add in the Unicorn Mode
 features:
 
 ```
-$ cd unicorn_mode
-$ ./build_unicorn_support.sh
+cd unicorn_mode
+./build_unicorn_support.sh
 ```
 
 NOTE: This script checks out a Unicorn Engine fork as submodule that has been tested 
@@ -71,7 +71,7 @@ Once you have all those things ready to go you just need to run afl-fuzz in
 'unicorn-mode' by passing in the '-U' flag:
 
 ```
-$ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@
+afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@
 ```
 
 The normal afl-fuzz command line format applies to everything here. Refer to