16 files changed, 617 insertions, 330 deletions
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile
index 01c83787..50a6be2b 100644
--- a/llvm_mode/GNUmakefile
+++ b/llvm_mode/GNUmakefile
@@ -32,7 +32,7 @@ ifeq "$(shell uname)" "OpenBSD"
   LLVM_CONFIG ?= $(BIN_PATH)/llvm-config
   HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1)
   ifeq "$(HAS_OPT)" "1"
-    $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9")
+    $(error llvm_mode needs a complete llvm installation (versions 3.4 up to 11) -> e.g. "pkg_add llvm-7.0.1p9")
   endif
 else
   LLVM_CONFIG ?= llvm-config
@@ -53,7 +53,7 @@ ifeq "$(LLVMVER)" ""
 endif
 
 ifeq "$(LLVM_UNSUPPORTED)" "1"
-  $(warning llvm_mode only supports llvm versions 3.8.0 up to 11)
+  $(warning llvm_mode only supports llvm versions 3.4 up to 11)
 endif
 
 ifeq "$(LLVM_MAJOR)" "9"
@@ -160,9 +160,9 @@ endif
 
 # After we set CC/CXX we can start makefile magic tests
 
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_OPT = -march=native
-endif
+#ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+#	CFLAGS_OPT = -march=native
+#endif
 
 ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
         AFL_CLANG_FLTO ?= -flto=full
@@ -355,15 +355,15 @@ endif
 	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) afl-llvm-common.o
 
 ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps
-	$(CC) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@
+	$(CLANG_BIN) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@
 
 ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps
 	@printf "[*] Building 32-bit variant of the runtime (-m32)... "
-	@$(CC) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
+	@$(CC_SAVE) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
 
 ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps
 	@printf "[*] Building 64-bit variant of the runtime (-m64)... "
-	@$(CC) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
+	@$(CC_SAVE) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
 
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc
index 7dc96bc3..ced1f383 100644
--- a/llvm_mode/LLVMInsTrim.so.cc
+++ b/llvm_mode/LLVMInsTrim.so.cc
@@ -160,21 +160,23 @@ struct InsTrim : public ModulePass {
     else
 #else
     if (ngram_size_str)
-#ifdef LLVM_VERSION_STRING
+  #ifdef LLVM_VERSION_STRING
       FATAL(
           "Sorry, NGRAM branch coverage is not supported with llvm version %s!",
           LLVM_VERSION_STRING);
-#else
-#ifndef LLVM_VERSION_PATCH
+  #else
+    #ifndef LLVM_VERSION_PATCH
       FATAL(
-          "Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!",
+          "Sorry, NGRAM branch coverage is not supported with llvm version "
+          "%d.%d.%d!",
           LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0);
-#else
+    #else
       FATAL(
-          "Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!",
+          "Sorry, NGRAM branch coverage is not supported with llvm version "
+          "%d.%d.%d!",
           LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERISON_PATCH);
-#endif
-#endif
+    #endif
+  #endif
 #endif
       PrevLocSize = 1;
 
diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile
index 0b306dde..3666a74d 100644
--- a/llvm_mode/Makefile
+++ b/llvm_mode/Makefile
@@ -1,2 +1,2 @@
 all:
-	@echo please use GNU make, thanks!
+	@gmake all || echo please install GNUmake
diff --git a/llvm_mode/README.instrim.md b/llvm_mode/README.instrim.md
index b905af11..53a518a9 100644
--- a/llvm_mode/README.instrim.md
+++ b/llvm_mode/README.instrim.md
@@ -6,6 +6,7 @@ InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing
 
 InsTrim uses CFG and markers to instrument just what is necessary in the
 binary in llvm_mode. It is about 10-15% faster without disadvantages.
+It requires at least llvm version 3.8.0.
 
 ## Usage
 
diff --git a/llvm_mode/README.laf-intel.md b/llvm_mode/README.laf-intel.md
index 462c7bac..2fa4bc26 100644
--- a/llvm_mode/README.laf-intel.md
+++ b/llvm_mode/README.laf-intel.md
@@ -37,3 +37,6 @@ series of sign, exponent and mantissa comparisons followed by splitting each
 of them into 8 bit comparisons when necessary.
 It is activated with the `AFL_LLVM_LAF_SPLIT_FLOATS` setting, available only
 when `AFL_LLVM_LAF_SPLIT_COMPARES` is set.
+
+You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled :-)
+
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md
index fa5b8665..48c587eb 100644
--- a/llvm_mode/README.lto.md
+++ b/llvm_mode/README.lto.md
@@ -6,8 +6,6 @@ This version requires a current llvm 11 compiled from the github master.
 
 1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
    coverage than anything else that is out there in the AFL world
-  1a. Set AFL_LLVM_INSTRUMENT=CFG if you want the InsTrimLTO version
-      (recommended)
 
 2. You can use it together with llvm_mode: laf-intel and whitelisting
    features and can be combined with cmplog/Redqueen
@@ -20,7 +18,6 @@ This version requires a current llvm 11 compiled from the github master.
    note that if that target uses _init functions or early constructors then
    also set `AFL_LLVM_MAP_DYNAMIC=1` as your target will crash otherwise
 
-
 ## Introduction and problem description
 
 A big issue with how afl/afl++ works is that the basic block IDs that are
@@ -50,7 +47,8 @@ and many dead ends until we got to this:
 The result:
  * 10-25% speed gain compared to llvm_mode
  * guaranteed non-colliding edge coverage :-)
- * The compile time especially for libraries can be longer
+ * The compile time especially for binaries to an instrumented library can be
+   much longer
 
 Example build output from a libtiff build:
 ```
@@ -61,24 +59,46 @@ AUTODICTIONARY: 11 strings found
 [+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode).
 ```
 
-## Building llvm 11
+## Getting llvm 11
+
+### Installing llvm 11
+Installing the llvm snapshot builds is easy and mostly painless:
+
+In the follow line change `NAME` for your Debian or Ubuntu release name
+(e.g. buster, focal, eon, etc.):
+```
+echo deb http://apt.llvm.org/NAME/ llvm-toolchain-NAME NAME >> /etc/apt/sources.list
+```
+then add the pgp key of llvm and install the packages:
+```
+wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 
+apt-get update && apt-get upgrade -y
+apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
+    libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \
+    libclang-common-11-dev libclang-cpp11 libclang-cpp11-dev liblld-11 \
+    liblld-11-dev liblldb-11 liblldb-11-dev libllvm11 libomp-11-dev \
+    libomp5-11 lld-11 lldb-11 llvm-11 llvm-11-dev llvm-11-runtime llvm-11-tools
+```
+
+### Building llvm 11
 
+Building llvm from github takes quite some long time and is not painless:
 ```
-$ sudo apt install binutils-dev  # this is *essential*!
-$ git clone https://github.com/llvm/llvm-project
-$ cd llvm-project
-$ mkdir build
-$ cd build
-$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
-$ make -j $(nproc)
-$ export PATH=`pwd`/bin:$PATH
-$ export LLVM_CONFIG=`pwd`/bin/llvm-config
-$ cd /path/to/AFLplusplus/
-$ make
-$ cd llvm_mode
-$ make
-$ cd ..
-$ make install
+sudo apt install binutils-dev  # this is *essential*!
+git clone https://github.com/llvm/llvm-project
+cd llvm-project
+mkdir build
+cd build
+cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
+make -j $(nproc)
+export PATH=`pwd`/bin:$PATH
+export LLVM_CONFIG=`pwd`/bin/llvm-config
+cd /path/to/AFLplusplus/
+make
+cd llvm_mode
+make
+cd ..
+make install
 ```
 
 ## How to use afl-clang-lto
@@ -160,11 +180,6 @@ target will likely crash when started. This can be avoided by compiling with
 
 This can e.g. happen with OpenSSL.
 
-## Upcoming Work
-
-1. Currently the LTO whitelist feature does not allow to instrument main,
-   start and init functions
-
 ## History
 
 This was originally envisioned by hexcoder- in Summer 2019, however we saw no
diff --git a/llvm_mode/README.md b/llvm_mode/README.md
index 0bff1ff1..c24aef49 100644
--- a/llvm_mode/README.md
+++ b/llvm_mode/README.md
@@ -6,7 +6,7 @@
 
 ## 1) Introduction
 
-! llvm_mode works with llvm versions 3.8.0 up to 11 !
+! llvm_mode works with llvm versions 3.4 up to 11 !
 
 The code in this directory allows you to instrument programs for AFL using
 true compiler-level instrumentation, instead of the more crude
@@ -35,7 +35,7 @@ Once this implementation is shown to be sufficiently robust and portable, it
 will probably replace afl-clang. For now, it can be built separately and
 co-exists with the original code.
 
-The idea and much of the implementation comes from Laszlo Szekeres.
+The idea and much of the intial implementation came from Laszlo Szekeres.
 
 ## 2a) How to use this - short
 
@@ -56,6 +56,8 @@ LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make
 It is highly recommended to use the newest clang version you can put your
 hands on :)
 
+Then look at [README.persistent_mode.md](README.persistent_mode.md).
+
 ## 2b) How to use this - long
 
 In order to leverage this mechanism, you need to have clang installed on your
@@ -159,96 +161,13 @@ See [README.snapshot](README.snapshot.md)
 This is an early-stage mechanism, so field reports are welcome. You can send bug
 reports to <afl-users@googlegroups.com>.
 
-## 6) Bonus feature #1: deferred initialization
-
-AFL tries to optimize performance by executing the targeted binary just once,
-stopping it just before main(), and then cloning this "master" process to get
-a steady supply of targets to fuzz.
-
-Although this approach eliminates much of the OS-, linker- and libc-level
-costs of executing the program, it does not always help with binaries that
-perform other time-consuming initialization steps - say, parsing a large config
-file before getting to the fuzzed data.
-
-In such cases, it's beneficial to initialize the forkserver a bit later, once
-most of the initialization work is already done, but before the binary attempts
-to read the fuzzed input and parse it; in some cases, this can offer a 10x+
-performance gain. You can implement delayed initialization in LLVM mode in a
-fairly simple way.
-
-First, find a suitable location in the code where the delayed cloning can 
-take place. This needs to be done with *extreme* care to avoid breaking the
-binary. In particular, the program will probably malfunction if you select
-a location after:
-
-  - The creation of any vital threads or child processes - since the forkserver
-    can't clone them easily.
-
-  - The initialization of timers via setitimer() or equivalent calls.
-
-  - The creation of temporary files, network sockets, offset-sensitive file
-    descriptors, and similar shared-state resources - but only provided that
-    their state meaningfully influences the behavior of the program later on.
-
-  - Any access to the fuzzed input, including reading the metadata about its
-    size.
-
-With the location selected, add this code in the appropriate spot:
-
-```c
-#ifdef __AFL_HAVE_MANUAL_CONTROL
-  __AFL_INIT();
-#endif
-```
-
-You don't need the #ifdef guards, but including them ensures that the program
-will keep working normally when compiled with a tool other than afl-clang-fast.
-
-Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will
-*not* generate a deferred-initialization binary) - and you should be all set!
-
-## 7) Bonus feature #2: persistent mode
-
-Some libraries provide APIs that are stateless, or whose state can be reset in
-between processing different input files. When such a reset is performed, a
-single long-lived process can be reused to try out multiple test cases,
-eliminating the need for repeated fork() calls and the associated OS overhead.
-
-The basic structure of the program that does this would be:
-
-```c
-  while (__AFL_LOOP(1000)) {
-
-    /* Read input data. */
-    /* Call library code to be fuzzed. */
-    /* Reset state. */
-
-  }
-
-  /* Exit normally */
-```
-
-The numerical value specified within the loop controls the maximum number
-of iterations before AFL will restart the process from scratch. This minimizes
-the impact of memory leaks and similar glitches; 1000 is a good starting point,
-and going much higher increases the likelihood of hiccups without giving you
-any real performance benefits.
-
-A more detailed template is shown in ../examples/persistent_demo/.
-Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
-guards can be used to suppress it when using other compilers.
-
-Note that as with the previous mode, the feature is easy to misuse; if you
-do not fully reset the critical state, you may end up with false positives or
-waste a whole lot of CPU power doing nothing useful at all. Be particularly
-wary of memory leaks and of the state of file descriptors.
+## 6) deferred initialization, persistent mode, shared memory fuzzing
 
-PS. Because there are task switches still involved, the mode isn't as fast as
-"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
-faster than the normal fork() model, and compared to in-process fuzzing,
-should be a lot more robust.
+This is the most powerful and effective fuzzing you can do.
+Please see [README.persistent_mode.md](README.persistent_mode.md) for a
+full explanation.
 
-## 8) Bonus feature #3: 'trace-pc-guard' mode
+## 7) Bonus feature: 'trace-pc-guard' mode
 
 LLVM is shipping with a built-in execution tracing feature
 that provides AFL with the necessary tracing data without the need to
@@ -260,11 +179,8 @@ If you have not an outdated compiler and want to give it a try, build
 targets this way:
 
 ```
- libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1  make
+AFL_LLVM_INSTRUMENT=PCGUARD  make
 ```
 
-Note that this mode is about 20% slower than "vanilla" afl-clang-fast,
-and about 5-10% slower than afl-clang. This is likely because the
-instrumentation is not inlined, and instead involves a function call.
-On systems that support it, compiling your target with -flto can help
-a bit.
+Note that this us currently the default, as it is the best mode.
+If you have llvm 11 and compiled afl-clang-lto - this is the only better mode.
diff --git a/llvm_mode/README.persistent_mode.md b/llvm_mode/README.persistent_mode.md
new file mode 100644
index 00000000..7aae8faa
--- /dev/null
+++ b/llvm_mode/README.persistent_mode.md
@@ -0,0 +1,167 @@
+# llvm_mode persistent mode
+
+## 1) Introduction
+
+The most effective way is to fuzz in persistent mode, as the speed can easily
+be x10 or x20 times faster without any disadvanges.
+*All professionel fuzzing is using this mode.*
+
+This requires that the target can be called in a (or several) function(s),
+and that the state can be resetted so that multiple calls be be performed
+without memory leaking and former runs having no impact on following runs
+(this can be seen by the `stability` indicator in the `afl-fuzz` UI).
+
+Examples can be found in [examples/persistent_mode](../examples/persistent_mode).
+
+## 2) TLDR;
+
+Example `fuzz_target.c`:
+```
+#include "what_you_need_for_your_target.h"
+
+__AFL_FUZZ_INIT();
+
+main() {
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+  __AFL_INIT();
+#endif
+
+  unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;  // must be after __AFL_INIT
+
+  while (__AFL_LOOP(10000)) {
+
+    int len = __AFL_FUZZ_TESTCASE_LEN;
+    if (len < 8) continue;  // check for a required/useful minimum input length
+
+    /* Setup function call, e.g. struct target *tmp = libtarget_init() */
+    /* Call function to be fuzzed, e.g.: */
+    target_function(buf, len);
+    /* Reset state. e.g. libtarget_free(tmp) */
+
+  }
+
+  return 0;
+
+}
+```
+And then compile:
+```
+afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target
+```
+And that is it!
+The speed increase is usually x10 to x20.
+
+## 3) deferred initialization
+
+AFL tries to optimize performance by executing the targeted binary just once,
+stopping it just before main(), and then cloning this "master" process to get
+a steady supply of targets to fuzz.
+
+Although this approach eliminates much of the OS-, linker- and libc-level
+costs of executing the program, it does not always help with binaries that
+perform other time-consuming initialization steps - say, parsing a large config
+file before getting to the fuzzed data.
+
+In such cases, it's beneficial to initialize the forkserver a bit later, once
+most of the initialization work is already done, but before the binary attempts
+to read the fuzzed input and parse it; in some cases, this can offer a 10x+
+performance gain. You can implement delayed initialization in LLVM mode in a
+fairly simple way.
+
+First, find a suitable location in the code where the delayed cloning can 
+take place. This needs to be done with *extreme* care to avoid breaking the
+binary. In particular, the program will probably malfunction if you select
+a location after:
+
+  - The creation of any vital threads or child processes - since the forkserver
+    can't clone them easily.
+
+  - The initialization of timers via setitimer() or equivalent calls.
+
+  - The creation of temporary files, network sockets, offset-sensitive file
+    descriptors, and similar shared-state resources - but only provided that
+    their state meaningfully influences the behavior of the program later on.
+
+  - Any access to the fuzzed input, including reading the metadata about its
+    size.
+
+With the location selected, add this code in the appropriate spot:
+
+```c
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+  __AFL_INIT();
+#endif
+```
+
+You don't need the #ifdef guards, but including them ensures that the program
+will keep working normally when compiled with a tool other than afl-clang-fast.
+
+Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will
+*not* generate a deferred-initialization binary) - and you should be all set!
+
+## 4) persistent mode
+
+Some libraries provide APIs that are stateless, or whose state can be reset in
+between processing different input files. When such a reset is performed, a
+single long-lived process can be reused to try out multiple test cases,
+eliminating the need for repeated fork() calls and the associated OS overhead.
+
+The basic structure of the program that does this would be:
+
+```c
+  while (__AFL_LOOP(1000)) {
+
+    /* Read input data. */
+    /* Call library code to be fuzzed. */
+    /* Reset state. */
+
+  }
+
+  /* Exit normally */
+```
+
+The numerical value specified within the loop controls the maximum number
+of iterations before AFL will restart the process from scratch. This minimizes
+the impact of memory leaks and similar glitches; 1000 is a good starting point,
+and going much higher increases the likelihood of hiccups without giving you
+any real performance benefits.
+
+A more detailed template is shown in ../examples/persistent_demo/.
+Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
+guards can be used to suppress it when using other compilers.
+
+Note that as with the previous mode, the feature is easy to misuse; if you
+do not fully reset the critical state, you may end up with false positives or
+waste a whole lot of CPU power doing nothing useful at all. Be particularly
+wary of memory leaks and of the state of file descriptors.
+
+PS. Because there are task switches still involved, the mode isn't as fast as
+"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
+faster than the normal fork() model, and compared to in-process fuzzing,
+should be a lot more robust.
+
+## 5) shared memory fuzzing
+
+You can speed up the fuzzing process even more by receiving the fuzzing data
+via shared memory instead of stdin or files.
+This is a further speed multiplier of about 2x.
+
+Setting this up is very easy:
+
+After the includes set the following macro:
+
+```
+__AFL_FUZZ_INIT();
+```
+Directly at the start of main - or if you are using the deferred forkserver
+with `__AFL_INIT()`  then *after* `__AFL_INIT? :
+```
+  unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+```
+
+Then as first line after the `__AFL_LOOP` while loop:
+```
+  int len = __AFL_FUZZ_TESTCASE_LEN;
+```
+and that is all!
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index 49dc6c1c..0b081ae6 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -45,30 +45,30 @@ static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 static u8   llvm_fullpath[PATH_MAX];
 static u8  instrument_mode, instrument_opt_mode, ngram_size, lto_mode, cpp_mode;
 static u8 *lto_flag = AFL_CLANG_FLTO;
-static u8 *march_opt = CFLAGS_OPT;
 static u8  debug;
 static u8  cwd[4096];
 static u8  cmplog_mode;
 u8         use_stdin = 0;                                          /* dummy */
+// static u8 *march_opt = CFLAGS_OPT;
 
 enum {
 
-  INSTRUMENT_CLASSIC = 0,
-  INSTRUMENT_AFL = 0,
-  INSTRUMENT_DEFAULT = 0,
-  INSTRUMENT_PCGUARD = 1,
-  INSTRUMENT_INSTRIM = 2,
-  INSTRUMENT_CFG = 2,
-  INSTRUMENT_LTO = 3,
-  INSTRUMENT_OPT_CTX = 4,
-  INSTRUMENT_OPT_NGRAM = 8
+  INSTURMENT_DEFAULT = 0,
+  INSTRUMENT_CLASSIC = 1,
+  INSTRUMENT_AFL = 1,
+  INSTRUMENT_PCGUARD = 2,
+  INSTRUMENT_INSTRIM = 3,
+  INSTRUMENT_CFG = 3,
+  INSTRUMENT_LTO = 4,
+  INSTRUMENT_OPT_CTX = 8,
+  INSTRUMENT_OPT_NGRAM = 16
 
 };
 
-char instrument_mode_string[10][16] = {
+char instrument_mode_string[18][18] = {
 
-    "CLASSIC", "PCGUARD", "CFG",   "LTO", "CTX", "",
-    "",        "",        "NGRAM", ""
+    "DEFAULT", "CLASSIC", "PCGUARD", "CFG", "LTO", "", "",      "", "CTX", "",
+    "",        "",        "",        "",    "",    "", "NGRAM", ""
 
 };
 
@@ -206,6 +206,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
+  cc_params[cc_par_cnt++] = "-Wno-unused-command-line-argument";
+
   if (lto_mode && cpp_mode)
     cc_params[cc_par_cnt++] = "-lc++";  // needed by fuzzbench, early
 
@@ -220,6 +222,20 @@ static void edit_params(u32 argc, char **argv, char **envp) {
      afl-clang-lto(++)
    */
 
+  if (lto_mode) {
+
+    if (getenv("AFL_LLVM_WHITELIST") != NULL) {
+
+      cc_params[cc_par_cnt++] = "-Xclang";
+      cc_params[cc_par_cnt++] = "-load";
+      cc_params[cc_par_cnt++] = "-Xclang";
+      cc_params[cc_par_cnt++] =
+          alloc_printf("%s/afl-llvm-lto-whitelist.so", obj_path);
+
+    }
+
+  }
+
   // laf
   if (getenv("LAF_SPLIT_SWITCHES") || getenv("AFL_LLVM_LAF_SPLIT_SWITCHES")) {
 
@@ -289,16 +305,6 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   if (lto_mode) {
 
-    if (getenv("AFL_LLVM_WHITELIST") != NULL) {
-
-      cc_params[cc_par_cnt++] = "-Xclang";
-      cc_params[cc_par_cnt++] = "-load";
-      cc_params[cc_par_cnt++] = "-Xclang";
-      cc_params[cc_par_cnt++] =
-          alloc_printf("%s/afl-llvm-lto-whitelist.so", obj_path);
-
-    }
-
     cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD);
     cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition";
     if (instrument_mode == INSTRUMENT_CFG)
@@ -331,7 +337,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-  cc_params[cc_par_cnt++] = "-Qunused-arguments";
+  // cc_params[cc_par_cnt++] = "-Qunused-arguments";
 
   // in case LLVM is installed not via a package manager or "make install"
   // e.g. compiled download or compiled from github then it's ./lib directory
@@ -436,8 +442,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     cc_params[cc_par_cnt++] = "-g";
     cc_params[cc_par_cnt++] = "-O3";
     cc_params[cc_par_cnt++] = "-funroll-loops";
-    if (strlen(march_opt) > 1 && march_opt[0] == '-')
-      cc_params[cc_par_cnt++] = march_opt;
+    // if (strlen(march_opt) > 1 && march_opt[0] == '-')
+    //  cc_params[cc_par_cnt++] = march_opt;
 
   }
 
@@ -486,6 +492,19 @@ static void edit_params(u32 argc, char **argv, char **envp) {
    */
 
   cc_params[cc_par_cnt++] =
+      "-D__AFL_FUZZ_INIT()="
+      "int __afl_sharedmem_fuzzing = 1;"
+      "extern unsigned int __afl_fuzz_len;"
+      "extern unsigned char *__afl_fuzz_ptr;"
+      "unsigned char *__afl_fuzz_alt_ptr;";
+  cc_params[cc_par_cnt++] =
+      "-D__AFL_FUZZ_TESTCASE_BUF=(__afl_fuzz_ptr ? __afl_fuzz_ptr : "
+      "(__afl_fuzz_alt_ptr = malloc(1 * 1024 * 1024)))";
+  cc_params[cc_par_cnt++] =
+      "-D__AFL_FUZZ_TESTCASE_LEN=(__afl_fuzz_ptr ? __afl_fuzz_len : read(0, "
+      "__afl_fuzz_alt_ptr, 1 * 1024 * 1024))";
+
+  cc_params[cc_par_cnt++] =
       "-D__AFL_LOOP(_A)="
       "({ static volatile char *_B __attribute__((used)); "
       " _B = (char*)\"" PERSIST_SIG
@@ -584,10 +603,6 @@ int main(int argc, char **argv, char **envp) {
 
     be_quiet = 1;
 
-#ifdef USE_TRACE_PC
-  instrument_mode = INSTRUMENT_PCGUARD;
-#endif
-
   if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
       getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) {
 
@@ -629,12 +644,11 @@ int main(int argc, char **argv, char **envp) {
 
     while (ptr) {
 
-      if (strncasecmp(ptr, "default", strlen("default")) == 0 ||
-          strncasecmp(ptr, "afl", strlen("afl")) == 0 ||
+      if (strncasecmp(ptr, "afl", strlen("afl")) == 0 ||
           strncasecmp(ptr, "classic", strlen("classic")) == 0) {
 
-        if (!instrument_mode || instrument_mode == INSTRUMENT_DEFAULT)
-          instrument_mode = INSTRUMENT_DEFAULT;
+        if (!instrument_mode || instrument_mode == INSTRUMENT_AFL)
+          instrument_mode = INSTRUMENT_AFL;
         else
           FATAL("main instrumentation mode already set with %s",
                 instrument_mode_string[instrument_mode]);
@@ -740,6 +754,17 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if (instrument_mode == 0) {
+
+#ifndef USE_TRACE_PC
+    if (getenv("AFL_LLVM_WHITELIST"))
+      instrument_mode = INSTRUMENT_AFL;
+    else
+#endif
+      instrument_mode = INSTRUMENT_PCGUARD;
+
+  }
+
   if (instrument_opt_mode && lto_mode)
     FATAL(
         "CTX and NGRAM can not be used in LTO mode (and would make LTO "
@@ -780,6 +805,9 @@ int main(int argc, char **argv, char **envp) {
         "AFL_LLVM_NOT_ZERO and AFL_LLVM_SKIP_NEVERZERO can not be set "
         "together");
 
+  if (instrument_mode == INSTRUMENT_PCGUARD && getenv("AFL_LLVM_WHITELIST"))
+    WARNF("Instrumentation type PCGUARD does not support AFL_LLVM_WHITELIST!");
+
   if (argc < 2 || strcmp(argv[1], "-h") == 0) {
 
     if (!lto_mode)
@@ -820,14 +848,14 @@ int main(int argc, char **argv, char **envp) {
         "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n"
         "AFL_LLVM_SKIP_NEVERZERO: do not skip zero on trace counters\n"
         "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n"
-        "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to "
-        "cascaded "
-        "comp.\n"
+        "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n"
         "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n"
         " to cascaded comparisons\n"
+        "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to "
+        "cascaded comp.\n"
         "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison "
         "function calls\n"
-        "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n"
+        "AFL_LLVM_LAF_ALL: enables all LAF splits/transforms\n"
         "AFL_LLVM_WHITELIST: enable whitelisting (selective "
         "instrumentation)\n"
         "AFL_NO_BUILTIN: compile for use with libtokencap.so\n"
@@ -843,12 +871,13 @@ int main(int argc, char **argv, char **envp) {
     SAYF(
         "\nafl-clang-fast specific environment variables:\n"
         "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n"
-        "AFL_LLVM_INSTRUMENT: set instrumentation mode: DEFAULT, CFG "
-        "(INSTRIM), PCGUARD, LTO, CTX, NGRAM-2 ... NGRAM-16\n"
-        " You can also use the old environment variables instead:"
-        "  AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n"
+        "AFL_LLVM_INSTRUMENT: set instrumentation mode: AFL, CFG "
+        "(INSTRIM), PCGUARD [DEFAULT], LTO, CTX, NGRAM-2 ... NGRAM-16\n"
+        " You can also use the old environment variables instead:\n"
+        "  AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation "
+        "[DEFAULT]\n"
         "  AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
-        "  AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (sub "
+        "  AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed ("
         "option to INSTRIM)\n"
         "  AFL_LLVM_CTX: use context sensitive coverage\n"
         "  AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage\n");
@@ -915,6 +944,15 @@ int main(int argc, char **argv, char **envp) {
 
   check_environment_vars(envp);
 
+  if (getenv("AFL_LLVM_LAF_ALL")) {
+
+    setenv("AFL_LLVM_LAF_SPLIT_SWITCHES", "1", 1);
+    setenv("AFL_LLVM_LAF_SPLIT_COMPARES", "1", 1);
+    setenv("AFL_LLVM_LAF_SPLIT_FLOATS", "1", 1);
+    setenv("AFL_LLVM_LAF_TRANSFORM_COMPARES", "1", 1);
+
+  }
+
   cmplog_mode = getenv("AFL_CMPLOG") || getenv("AFL_LLVM_CMPLOG");
   if (!be_quiet && cmplog_mode)
     printf("CmpLog mode by <andreafioraldi@gmail.com>\n");
diff --git a/llvm_mode/afl-llvm-lto-instrim.so.cc b/llvm_mode/afl-llvm-lto-instrim.so.cc
index a7d9b756..27504e8d 100644
--- a/llvm_mode/afl-llvm-lto-instrim.so.cc
+++ b/llvm_mode/afl-llvm-lto-instrim.so.cc
@@ -561,6 +561,17 @@ struct InsTrimLTO : public ModulePass {
       if (F.size() < function_minimum_size) continue;
       if (isBlacklisted(&F)) continue;
 
+      // whitelist check
+      AttributeList Attrs = F.getAttributes();
+      if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) {
+
+        if (debug)
+          fprintf(stderr, "DEBUG: Function %s is not whitelisted\n",
+                  F.getName().str().c_str());
+        continue;
+
+      }
+
       std::unordered_set<BasicBlock *> MS;
       if (!MarkSetOpt) {
 
diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
index f44b336e..cbe68171 100644
--- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc
+++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
@@ -197,6 +197,17 @@ bool AFLLTOPass::runOnModule(Module &M) {
     if (F.size() < function_minimum_size) continue;
     if (isBlacklisted(&F)) continue;
 
+    // whitelist check
+    AttributeList Attrs = F.getAttributes();
+    if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) {
+
+      if (debug)
+        fprintf(stderr, "DEBUG: Function %s is not whitelisted\n",
+                F.getName().str().c_str());
+      continue;
+
+    }
+
     std::vector<BasicBlock *> InsBlocks;
 
     if (autodictionary) {
diff --git a/llvm_mode/afl-llvm-lto-whitelist.so.cc b/llvm_mode/afl-llvm-lto-whitelist.so.cc
index a116c4ea..8856ce21 100644
--- a/llvm_mode/afl-llvm-lto-whitelist.so.cc
+++ b/llvm_mode/afl-llvm-lto-whitelist.so.cc
@@ -122,64 +122,65 @@ bool AFLwhitelist::runOnModule(Module &M) {
 
   for (auto &F : M) {
 
+    if (F.size() < 1) continue;
+    // fprintf(stderr, "F:%s\n", F.getName().str().c_str());
     if (isBlacklisted(&F)) continue;
 
-    for (auto &BB : F) {
+    BasicBlock::iterator IP = F.getEntryBlock().getFirstInsertionPt();
+    IRBuilder<>          IRB(&(*IP));
 
-      BasicBlock::iterator IP = BB.getFirstInsertionPt();
-      IRBuilder<>          IRB(&(*IP));
+    if (!myWhitelist.empty()) {
 
-      if (!myWhitelist.empty()) {
+      bool instrumentFunction = false;
 
-        bool instrumentBlock = false;
+      /* Get the current location using debug information.
+       * For now, just instrument the block if we are not able
+       * to determine our location. */
+      DebugLoc Loc = IP->getDebugLoc();
+      if (Loc) {
 
-        /* Get the current location using debug information.
-         * For now, just instrument the block if we are not able
-         * to determine our location. */
-        DebugLoc Loc = IP->getDebugLoc();
-        if (Loc) {
+        DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
 
-          DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+        unsigned int instLine = cDILoc->getLine();
+        StringRef    instFilename = cDILoc->getFilename();
 
-          unsigned int instLine = cDILoc->getLine();
-          StringRef    instFilename = cDILoc->getFilename();
+        if (instFilename.str().empty()) {
 
-          if (instFilename.str().empty()) {
+          /* If the original location is empty, try using the inlined location
+           */
+          DILocation *oDILoc = cDILoc->getInlinedAt();
+          if (oDILoc) {
 
-            /* If the original location is empty, try using the inlined location
-             */
-            DILocation *oDILoc = cDILoc->getInlinedAt();
-            if (oDILoc) {
-
-              instFilename = oDILoc->getFilename();
-              instLine = oDILoc->getLine();
-
-            }
+            instFilename = oDILoc->getFilename();
+            instLine = oDILoc->getLine();
 
           }
 
-          (void)instLine;
+        }
 
-          /* Continue only if we know where we actually are */
-          if (!instFilename.str().empty()) {
+        (void)instLine;
 
-            for (std::list<std::string>::iterator it = myWhitelist.begin();
-                 it != myWhitelist.end(); ++it) {
+        if (debug)
+          SAYF(cMGN "[D] " cRST "function %s is in file %s\n",
+               F.getName().str().c_str(), instFilename.str().c_str());
+        /* Continue only if we know where we actually are */
+        if (!instFilename.str().empty()) {
 
-              /* We don't check for filename equality here because
-               * filenames might actually be full paths. Instead we
-               * check that the actual filename ends in the filename
-               * specified in the list. */
-              if (instFilename.str().length() >= it->length()) {
+          for (std::list<std::string>::iterator it = myWhitelist.begin();
+               it != myWhitelist.end(); ++it) {
 
-                if (instFilename.str().compare(
-                        instFilename.str().length() - it->length(),
-                        it->length(), *it) == 0) {
+            /* We don't check for filename equality here because
+             * filenames might actually be full paths. Instead we
+             * check that the actual filename ends in the filename
+             * specified in the list. */
+            if (instFilename.str().length() >= it->length()) {
 
-                  instrumentBlock = true;
-                  break;
+              if (instFilename.str().compare(
+                      instFilename.str().length() - it->length(), it->length(),
+                      *it) == 0) {
 
-                }
+                instrumentFunction = true;
+                break;
 
               }
 
@@ -189,43 +190,35 @@ bool AFLwhitelist::runOnModule(Module &M) {
 
         }
 
-        /* Either we couldn't figure out our location or the location is
-         * not whitelisted, so we skip instrumentation.
-         * We do this by renaming the function. */
-        if (!instrumentBlock) {
-
-          if (F.getName().compare("main") == 0 ||
-              F.getName().compare("start") == 0 ||
-              F.getName().compare("_start") == 0 ||
-              F.getName().compare("init") == 0 ||
-              F.getName().compare("_init") == 0) {
-
-            // We do not honor be_quiet for this one
-            WARNF("Cannot ignore functions main/init/start");
-
-          } else {
-
-            // StringRef newName = StringRef("ign.") + F.getName();
-            if (debug)
-              SAYF(cMGN "[D] " cRST "renamed %s to ign.%s\n",
-                   F.getName().str().c_str(), F.getName().str().c_str());
-            Function *_F(&F);
-            _F->setName("ign." + F.getName());
-
-          }
+      }
 
-        } else if (debug)
+      /* Either we couldn't figure out our location or the location is
+       * not whitelisted, so we skip instrumentation.
+       * We do this by renaming the function. */
+      if (instrumentFunction == true) {
 
+        if (debug)
           SAYF(cMGN "[D] " cRST "function %s is in whitelist\n",
                F.getName().str().c_str());
 
       } else {
 
-        PFATAL("Whitelist is empty");
+        if (debug)
+          SAYF(cMGN "[D] " cRST "function %s is NOT in whitelist\n",
+               F.getName().str().c_str());
+
+        auto &        Ctx = F.getContext();
+        AttributeList Attrs = F.getAttributes();
+        AttrBuilder   NewAttrs;
+        NewAttrs.addAttribute("skipinstrument");
+        F.setAttributes(
+            Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
 
       }
 
-      break;
+    } else {
+
+      PFATAL("Whitelist is empty");
 
     }
 
diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc
index 5bf705f8..82dece75 100644
--- a/llvm_mode/afl-llvm-pass.so.cc
+++ b/llvm_mode/afl-llvm-pass.so.cc
@@ -211,15 +211,17 @@ bool AFLCoverage::runOnModule(Module &M) {
   else
 #else
   if (ngram_size_str)
-#ifndef LLVM_VERSION_PATCH
-    FATAL("Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!",
-          LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR,
-          0);
-#else
-    FATAL("Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!",
-          LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR,
-          LLVM_VERSION_PATCH);
-#endif
+  #ifndef LLVM_VERSION_PATCH
+    FATAL(
+        "Sorry, NGRAM branch coverage is not supported with llvm version "
+        "%d.%d.%d!",
+        LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0);
+  #else
+    FATAL(
+        "Sorry, NGRAM branch coverage is not supported with llvm version "
+        "%d.%d.%d!",
+        LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH);
+  #endif
 #endif
     PrevLocSize = 1;
 
diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c
index c0d1569d..3a0584e4 100644
--- a/llvm_mode/afl-llvm-rt.o.c
+++ b/llvm_mode/afl-llvm-rt.o.c
@@ -63,13 +63,21 @@
    is used for instrumentation output before __afl_map_shm() has a chance to
    run. It will end up as .comm, so it shouldn't be too wasteful. */
 
+#if MAP_SIZE <= 65536
+  #define MAP_INITIAL_SIZE 256000
+#else
+  #define MAP_INITIAL_SIZE MAP_SIZE
+#endif
+
 #ifdef AFL_REAL_LD
-u8 __afl_area_initial[256000];
+u8 __afl_area_initial[MAP_INITIAL_SIZE];
 #else
 u8                  __afl_area_initial[MAP_SIZE];
 #endif
 u8 *__afl_area_ptr = __afl_area_initial;
 u8 *__afl_dictionary;
+u8 *__afl_fuzz_ptr;
+u32 __afl_fuzz_len;
 
 u32 __afl_final_loc;
 u32 __afl_map_size = MAP_SIZE;
@@ -86,6 +94,8 @@ __thread u32        __afl_prev_ctx;
 __thread u32        __afl_cmp_counter;
 #endif
 
+int __afl_sharedmem_fuzzing __attribute__((weak));
+
 struct cmp_map *__afl_cmp_map;
 
 /* Running in persistent mode? */
@@ -103,6 +113,59 @@ void send_forkserver_error(int error) {
 
 }
 
+/* SHM fuzzing setup. */
+
+static void __afl_map_shm_fuzz() {
+
+  char *id_str = getenv(SHM_FUZZ_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+    const char *   shm_file_path = id_str;
+    int            shm_fd = -1;
+    unsigned char *shm_base = NULL;
+
+    /* create the shared memory segment as if it was a file */
+    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    if (shm_fd == -1) {
+
+      fprintf(stderr, "shm_open() failed for fuzz\n");
+      send_forkserver_error(FS_ERROR_SHM_OPEN);
+      exit(1);
+
+    }
+
+    __afl_fuzz_ptr = mmap(0, MAX_FILE, PROT_READ, MAP_SHARED, shm_fd, 0);
+
+#else
+    u32 shm_id = atoi(id_str);
+
+    __afl_fuzz_ptr = shmat(shm_id, NULL, 0);
+
+#endif
+
+    /* Whooooops. */
+
+    if (__afl_fuzz_ptr == (void *)-1) {
+
+      fprintf(stderr, "Error: could not access fuzzing shared memory\n");
+      exit(1);
+
+    }
+
+    if (getenv("AFL_DEBUG"))
+      fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n");
+
+  } else {
+
+    fprintf(stderr, "Error: variable for fuzzing shared memory is not set\n");
+    exit(1);
+
+  }
+
+}
+
 /* SHM setup. */
 
 static void __afl_map_shm(void) {
@@ -304,17 +367,25 @@ static void __afl_start_snapshots(void) {
      assume we're not running in forkserver mode and just execute program. */
 
   status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT);
+  if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
   if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
     status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
-  if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
   memcpy(tmp, &status, 4);
 
   if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
 
-  if (__afl_dictionary_len > 0 && __afl_dictionary) {
+  if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
+    if ((was_killed & (0xffffffff & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ))) ==
+        (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) {
+
+      __afl_map_shm_fuzz();
+
+    }
+
     if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
         (FS_OPT_ENABLED | FS_OPT_AUTODICT)) {
 
@@ -351,7 +422,7 @@ static void __afl_start_snapshots(void) {
 
       // uh this forkserver master does not understand extended option passing
       // or does not want the dictionary
-      already_read_first = 1;
+      if (!__afl_fuzz_ptr) already_read_first = 1;
 
     }
 
@@ -372,6 +443,9 @@ static void __afl_start_snapshots(void) {
 
     }
 
+    __afl_fuzz_len = (was_killed >> 8);
+    was_killed = (was_killed & 0xff);
+
     /* If we stopped the child in persistent mode, but there was a race
        condition and afl-fuzz already issued SIGKILL, write off the old
        process. */
@@ -445,7 +519,7 @@ static void __afl_start_snapshots(void) {
 static void __afl_start_forkserver(void) {
 
 #ifdef __linux__
-  if (!is_persistent && !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") &&
+  if (/*!is_persistent &&*/ !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") &&
       afl_snapshot_init() >= 0) {
 
     __afl_start_snapshots();
@@ -467,7 +541,8 @@ static void __afl_start_forkserver(void) {
 
   if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
     status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
-  if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
   if (status) status |= (FS_OPT_ENABLED);
   memcpy(tmp, &status, 4);
 
@@ -476,10 +551,17 @@ static void __afl_start_forkserver(void) {
 
   if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
 
-  if (__afl_dictionary_len > 0 && __afl_dictionary) {
+  if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
+    if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) ==
+        (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) {
+
+      __afl_map_shm_fuzz();
+
+    }
+
     if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
         (FS_OPT_ENABLED | FS_OPT_AUTODICT)) {
 
@@ -516,7 +598,7 @@ static void __afl_start_forkserver(void) {
 
       // uh this forkserver master does not understand extended option passing
       // or does not want the dictionary
-      already_read_first = 1;
+      if (!__afl_fuzz_ptr) already_read_first = 1;
 
     }
 
@@ -538,6 +620,9 @@ static void __afl_start_forkserver(void) {
 
     }
 
+    __afl_fuzz_len = (was_killed >> 8);
+    was_killed = (was_killed & 0xff);
+
     /* If we stopped the child in persistent mode, but there was a race
        condition and afl-fuzz already issued SIGKILL, write off the old
        process. */
@@ -703,13 +788,13 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
 
 void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
-  u32 inst_ratio = 100;
-  u8 *x;
+  u32   inst_ratio = 100;
+  char *x;
 
   if (start == stop || *start) return;
 
   x = getenv("AFL_INST_RATIO");
-  if (x) inst_ratio = atoi(x);
+  if (x) inst_ratio = (u32)atoi(x);
 
   if (!inst_ratio || inst_ratio > 100) {
 
@@ -892,7 +977,7 @@ static int area_is_mapped(void *ptr, size_t len) {
 
 }
 
-void __cmplog_rtn_hook(void *ptr1, void *ptr2) {
+void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   if (!__afl_cmp_map) return;
 
diff --git a/llvm_mode/cmplog-routines-pass.cc b/llvm_mode/cmplog-routines-pass.cc
index bb78273a..e05a1843 100644
--- a/llvm_mode/cmplog-routines-pass.cc
+++ b/llvm_mode/cmplog-routines-pass.cc
@@ -93,16 +93,17 @@ bool CmpLogRoutines::hookRtns(Module &M) {
   std::vector<CallInst *> calls;
   LLVMContext &           C = M.getContext();
 
-  Type *       VoidTy = Type::getVoidTy(C);
-  PointerType *VoidPtrTy = PointerType::get(VoidTy, 0);
+  Type *VoidTy = Type::getVoidTy(C);
+  // PointerType *VoidPtrTy = PointerType::get(VoidTy, 0);
+  IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+  PointerType *i8PtrTy = PointerType::get(Int8Ty, 0);
 
 #if LLVM_VERSION_MAJOR < 9
   Constant *
 #else
   FunctionCallee
 #endif
-      c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, VoidPtrTy,
-                                VoidPtrTy
+      c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, i8PtrTy, i8PtrTy
 #if LLVM_VERSION_MAJOR < 5
                                 ,
                                 NULL
@@ -163,8 +164,10 @@ bool CmpLogRoutines::hookRtns(Module &M) {
     IRB.SetInsertPoint(callInst);
 
     std::vector<Value *> args;
-    args.push_back(v1P);
-    args.push_back(v2P);
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
 
     IRB.CreateCall(cmplogHookFn, args, "tmp");
 
diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc
index 1ebc54d7..4e99aafb 100644
--- a/llvm_mode/compare-transform-pass.so.cc
+++ b/llvm_mode/compare-transform-pass.so.cc
@@ -304,17 +304,27 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
           if (!(HasStr1 || HasStr2)) continue;
 
           if (isMemcmp || isStrncmp || isStrncasecmp) {
-
             /* check if third operand is a constant integer
              * strlen("constStr") and sizeof() are treated as constant */
             Value *      op2 = callInst->getArgOperand(2);
             ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
-            if (!ilen) continue;
-            /* final precaution: if size of compare is larger than constant
-             * string skip it*/
-            uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size();
-            if (literalLength + 1 < ilen->getZExtValue()) continue;
-
+            if (ilen) {
+              uint64_t len = ilen->getZExtValue();
+              // if len is zero this is a pointless call but allow real
+              // implementation to worry about that
+              if (!len) continue;
+
+              if (isMemcmp) {
+                // if size of compare is larger than constant string this is
+                // likely a bug but allow real implementation to worry about
+                // that
+                uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size();
+                if (literalLength + 1 < ilen->getZExtValue()) continue;
+              }
+            } else if (isMemcmp)
+              // this *may* supply a len greater than the constant string at
+              // runtime so similarly we don't want to have to handle that
+              continue;
           }
 
           calls.push_back(callInst);
@@ -341,7 +351,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     Value *     VarStr;
     bool        HasStr1 = getConstantStringInfo(Str1P, Str1);
     bool        HasStr2 = getConstantStringInfo(Str2P, Str2);
-    uint64_t    constLen, sizedLen;
+    uint64_t    constStrLen, constSizedLen, unrollLen;
     bool        isMemcmp =
         !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
     bool isSizedcmp = isMemcmp ||
@@ -349,23 +359,13 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                           StringRef("strncmp")) ||
                       !callInst->getCalledFunction()->getName().compare(
                           StringRef("strncasecmp"));
+    Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL;
+    bool isConstSized = sizedValue && isa<ConstantInt>(sizedValue);
     bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare(
                                  StringRef("strcasecmp")) ||
                              !callInst->getCalledFunction()->getName().compare(
                                  StringRef("strncasecmp"));
 
-    if (isSizedcmp) {
-
-      Value *      op2 = callInst->getArgOperand(2);
-      ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
-      sizedLen = ilen->getZExtValue();
-
-    } else {
-
-      sizedLen = 0;
-
-    }
-
     if (!(HasStr1 || HasStr2)) {
 
       // do we have a saved local or global variable initialization?
@@ -389,93 +389,133 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    if (isConstSized) {
+
+      constSizedLen = dyn_cast<ConstantInt>(sizedValue)->getZExtValue();
+
+    }
+
     if (HasStr1) {
 
       TmpConstStr = Str1.str();
       VarStr = Str2P;
-      constLen = isMemcmp ? sizedLen : TmpConstStr.length();
 
     } else {
 
       TmpConstStr = Str2.str();
       VarStr = Str1P;
-      constLen = isMemcmp ? sizedLen : TmpConstStr.length();
 
     }
 
-    /* properly handle zero terminated C strings by adding the terminating 0 to
-     * the StringRef (in comparison to std::string a StringRef has built-in
-     * runtime bounds checking, which makes debugging easier) */
+    // add null termination character implicit in c strings
     TmpConstStr.append("\0", 1);
-    if (!sizedLen) constLen++;
+
+    // in the unusual case the const str has embedded null
+    // characters, the string comparison functions should terminate
+    // at the first null
+    if (!isMemcmp)
+      TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
+
+    constStrLen = TmpConstStr.length();
+    // prefer use of StringRef (in comparison to std::string a StringRef has
+    // built-in runtime bounds checking, which makes debugging easier)
     ConstStr = StringRef(TmpConstStr);
-    // fprintf(stderr, "issized: %d, const > sized ? %u > %u\n", isSizedcmp,
-    // constLen, sizedLen);
-    if (isSizedcmp && constLen > sizedLen && sizedLen) constLen = sizedLen;
-    if (constLen > TmpConstStr.length()) constLen = TmpConstStr.length();
-    if (!constLen) constLen = TmpConstStr.length();
-    if (!constLen) continue;
+
+    if (isConstSized)
+      unrollLen = constSizedLen < constStrLen ? constSizedLen : constStrLen;
+    else
+      unrollLen = constStrLen;
 
     if (!be_quiet)
-      errs() << callInst->getCalledFunction()->getName() << ": len " << constLen
+      errs() << callInst->getCalledFunction()->getName() << ": unroll len " << unrollLen
+             << ((isSizedcmp && !isConstSized) ? ", variable n" : "")
              << ": " << ConstStr << "\n";
 
     /* split before the call instruction */
     BasicBlock *bb = callInst->getParent();
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst));
-    BasicBlock *next_bb =
+
+    BasicBlock *next_lenchk_bb = NULL;
+    if (isSizedcmp && !isConstSized) {
+      next_lenchk_bb = BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb);
+      BranchInst::Create(end_bb, next_lenchk_bb);
+    }
+    BasicBlock *next_cmp_bb =
         BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
-    BranchInst::Create(end_bb, next_bb);
-    PHINode *PN = PHINode::Create(Int32Ty, constLen + 1, "cmp_phi");
+    BranchInst::Create(end_bb, next_cmp_bb);
+    PHINode *PN = PHINode::Create(Int32Ty, (next_lenchk_bb ? 2 : 1) * unrollLen + 1, "cmp_phi");
+
 
 #if LLVM_VERSION_MAJOR < 8
     TerminatorInst *term = bb->getTerminator();
 #else
     Instruction *term = bb->getTerminator();
 #endif
-    BranchInst::Create(next_bb, bb);
+    BranchInst::Create(next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, bb);
     term->eraseFromParent();
 
-    for (uint64_t i = 0; i < constLen; i++) {
+    for (uint64_t i = 0; i < unrollLen; i++) {
+
+      BasicBlock *cur_cmp_bb = next_cmp_bb, *cur_lenchk_bb = next_lenchk_bb;
+      unsigned char c;
+
+      if (cur_lenchk_bb) {
 
-      BasicBlock *cur_bb = next_bb;
+        IRBuilder<> cur_lenchk_IRB(&*(cur_lenchk_bb->getFirstInsertionPt()));
+        Value *icmp = cur_lenchk_IRB.CreateICmpEQ(
+          sizedValue, ConstantInt::get(Int64Ty, i));
+        cur_lenchk_IRB.CreateCondBr(icmp, end_bb, cur_cmp_bb);
+        cur_lenchk_bb->getTerminator()->eraseFromParent();
 
-      char c = isCaseInsensitive ? tolower(ConstStr[i]) : ConstStr[i];
+        PN->addIncoming(ConstantInt::get(Int32Ty, 0), cur_lenchk_bb);
 
-      BasicBlock::iterator IP = next_bb->getFirstInsertionPt();
-      IRBuilder<>          IRB(&*IP);
+      }
+
+      if (isCaseInsensitive)
+        c = (unsigned char)(tolower((int)ConstStr[i]) & 0xff);
+      else
+        c = (unsigned char)ConstStr[i];
+
+      IRBuilder<> cur_cmp_IRB(&*(cur_cmp_bb->getFirstInsertionPt()));
 
       Value *v = ConstantInt::get(Int64Ty, i);
-      Value *ele = IRB.CreateInBoundsGEP(VarStr, v, "empty");
-      Value *load = IRB.CreateLoad(ele);
+      Value *ele = cur_cmp_IRB.CreateInBoundsGEP(VarStr, v, "empty");
+      Value *load = cur_cmp_IRB.CreateLoad(ele);
+
       if (isCaseInsensitive) {
 
         // load >= 'A' && load <= 'Z' ? load | 0x020 : load
+        load = cur_cmp_IRB.CreateZExt(load, Int32Ty);
         std::vector<Value *> args;
         args.push_back(load);
-        load = IRB.CreateCall(tolowerFn, args, "tmp");
-        load = IRB.CreateTrunc(load, Int8Ty);
+        load = cur_cmp_IRB.CreateCall(tolowerFn, args, "tmp");
+        load = cur_cmp_IRB.CreateTrunc(load, Int8Ty);
 
       }
 
       Value *isub;
       if (HasStr1)
-        isub = IRB.CreateSub(ConstantInt::get(Int8Ty, c), load);
+        isub = cur_cmp_IRB.CreateSub(ConstantInt::get(Int8Ty, c), load);
       else
-        isub = IRB.CreateSub(load, ConstantInt::get(Int8Ty, c));
+        isub = cur_cmp_IRB.CreateSub(load, ConstantInt::get(Int8Ty, c));
+
+      Value *sext = cur_cmp_IRB.CreateSExt(isub, Int32Ty);
+      PN->addIncoming(sext, cur_cmp_bb);
 
-      Value *sext = IRB.CreateSExt(isub, Int32Ty);
-      PN->addIncoming(sext, cur_bb);
+      if (i < unrollLen - 1) {
 
-      if (i < constLen - 1) {
+        if (cur_lenchk_bb) {
+          next_lenchk_bb = BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb);
+          BranchInst::Create(end_bb, next_lenchk_bb);
+        }
 
-        next_bb =
+        next_cmp_bb =
             BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
-        BranchInst::Create(end_bb, next_bb);
+        BranchInst::Create(end_bb, next_cmp_bb);
 
-        Value *icmp = IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0));
-        IRB.CreateCondBr(icmp, next_bb, end_bb);
-        cur_bb->getTerminator()->eraseFromParent();
+        Value *icmp = cur_cmp_IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0));
+        cur_cmp_IRB.CreateCondBr(icmp, next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, end_bb);
+        cur_cmp_bb->getTerminator()->eraseFromParent();
 
       } else {