about summary refs log tree commit diff
path: root/instrumentation
diff options
context:
space:
mode:
Diffstat (limited to 'instrumentation')
-rw-r--r--instrumentation/LLVMInsTrim.so.cc33
-rw-r--r--instrumentation/README.cmplog.md10
-rw-r--r--instrumentation/README.ctx.md22
-rw-r--r--instrumentation/README.gcc_plugin.md61
-rw-r--r--instrumentation/README.instrument_list.md4
-rw-r--r--instrumentation/README.lto.md31
-rw-r--r--instrumentation/README.neverzero.md2
-rw-r--r--instrumentation/README.ngram.md4
-rw-r--r--instrumentation/README.out_of_line.md8
-rw-r--r--instrumentation/README.persistent_mode.md26
-rw-r--r--instrumentation/SanitizerCoverageLTO.so.cc33
-rw-r--r--instrumentation/SanitizerCoveragePCGUARD.so.cc246
-rw-r--r--instrumentation/afl-compiler-rt.o.c368
-rw-r--r--instrumentation/afl-llvm-common.cc29
-rw-r--r--instrumentation/afl-llvm-dict2file.so.cc26
-rw-r--r--instrumentation/afl-llvm-lto-instrumentation.so.cc30
-rw-r--r--instrumentation/afl-llvm-pass.so.cc191
-rw-r--r--instrumentation/cmplog-instructions-pass.cc181
-rw-r--r--instrumentation/compare-transform-pass.so.cc62
-rw-r--r--instrumentation/llvm-alternative-coverage.h (renamed from instrumentation/llvm-ngram-coverage.h)3
-rw-r--r--instrumentation/split-compares-pass.so.cc57
21 files changed, 850 insertions, 577 deletions
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc
index 235ee30f..62de6ec5 100644
--- a/instrumentation/LLVMInsTrim.so.cc
+++ b/instrumentation/LLVMInsTrim.so.cc
@@ -38,7 +38,7 @@ typedef long double max_align_t;
 
 #include "MarkNodes.h"
 #include "afl-llvm-common.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 #include "config.h"
 #include "debug.h"
@@ -135,7 +135,7 @@ struct InsTrim : public ModulePass {
     unsigned int PrevLocSize = 0;
     char *       ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
     if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
-    char *ctx_str = getenv("AFL_LLVM_CTX");
+    char *caller_str = getenv("AFL_LLVM_CALLER");
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
     unsigned int ngram_size = 0;
@@ -197,9 +197,9 @@ struct InsTrim : public ModulePass {
                            GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
     GlobalVariable *AFLPrevLoc;
     GlobalVariable *AFLContext = NULL;
-    LoadInst *      PrevCtx = NULL;  // for CTX sensitive coverage
+    LoadInst *      PrevCaller = NULL;  // for CALLER sensitive coverage
 
-    if (ctx_str)
+    if (caller_str)
 #if defined(__ANDROID__) || defined(__HAIKU__)
       AFLContext = new GlobalVariable(
           M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
@@ -398,11 +398,11 @@ struct InsTrim : public ModulePass {
         unsigned int cur_loc;
 
         // Context sensitive coverage
-        if (ctx_str && &BB == &F.getEntryBlock()) {
+        if (caller_str && &BB == &F.getEntryBlock()) {
 
-          PrevCtx = IRB.CreateLoad(AFLContext);
-          PrevCtx->setMetadata(M.getMDKindID("nosanitize"),
-                               MDNode::get(C, None));
+          PrevCaller = IRB.CreateLoad(AFLContext);
+          PrevCaller->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
 
           // does the function have calls? and is any of the calls larger than
           // one basic block?
@@ -441,7 +441,7 @@ struct InsTrim : public ModulePass {
 
           }
 
-        }  // END of ctx_str
+        }  // END of caller_str
 
         if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
 
@@ -459,7 +459,7 @@ struct InsTrim : public ModulePass {
             BasicBlock *PBB = *PI;
             auto        It = PredMap.insert({PBB, genLabel()});
             unsigned    Label = It.first->second;
-            cur_loc = Label;
+            // cur_loc = Label;
             PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
 
           }
@@ -485,9 +485,9 @@ struct InsTrim : public ModulePass {
 #endif
           PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
 
-        if (ctx_str)
+        if (caller_str)
           PrevLocTrans =
-              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
+              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCaller), Int32Ty);
 
         /* Load SHM pointer */
         LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
@@ -535,16 +535,17 @@ struct InsTrim : public ModulePass {
         IRB.CreateStore(Incr, MapPtrIdx)
             ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-        if (ctx_str && has_calls) {
+        if (caller_str && has_calls) {
 
-          // in CTX mode we have to restore the original context for the
+          // in CALLER mode we have to restore the original context for the
           // caller - she might be calling other functions which need the
-          // correct CTX
+          // correct CALLER
           Instruction *Inst = BB.getTerminator();
           if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
             IRBuilder<> Post_IRB(Inst);
-            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+            StoreInst * RestoreCtx =
+                Post_IRB.CreateStore(PrevCaller, AFLContext);
             RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                     MDNode::get(C, None));
 
diff --git a/instrumentation/README.cmplog.md b/instrumentation/README.cmplog.md
index 5f855e1f..a796c7a7 100644
--- a/instrumentation/README.cmplog.md
+++ b/instrumentation/README.cmplog.md
@@ -1,10 +1,11 @@
 # CmpLog instrumentation
 
-The CmpLog instrumentation enables the logging of the comparisons operands in a
+The CmpLog instrumentation enables logging of comparison operands in a
 shared memory.
 
 These values can be used by various mutators built on top of it.
-At the moment we support the RedQueen mutator (input-2-state instructions only).
+At the moment we support the RedQueen mutator (input-2-state instructions only), 
+for details see [the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf).
 
 ## Build
 
@@ -13,7 +14,7 @@ program.
 
 The first version is built using the regular AFL++ instrumentation.
 
-The second one, the CmpLog binary, with setting AFL_LLVM_CMPLOG during the compilation.
+The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during the compilation.
 
 For example:
 
@@ -26,11 +27,12 @@ export AFL_LLVM_CMPLOG=1
 ./configure --cc=~/path/to/afl-clang-fast
 make
 cp ./program ./program.cmplog
+unset AFL_LLVM_CMPLOG
 ```
 
 ## Use
 
-AFL++ has the new -c option that needs to be used to specify the CmpLog binary (the second
+AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary (the second
 build).
 
 For example:
diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md
index caf2c09a..335e9921 100644
--- a/instrumentation/README.ctx.md
+++ b/instrumentation/README.ctx.md
@@ -4,14 +4,19 @@
 
 This is an LLVM-based implementation of the context sensitive branch coverage.
 
-Basically every function gets its own ID and that ID is combined with the
-edges of the called functions.
+Basically every function gets its own ID and, every time when an edge is logged,
+all the IDs in the callstack are hashed and combined with the edge transition
+hash to augment the classic edge coverage with the information about the
+calling context.
 
 So if both function A and function B call a function C, the coverage
 collected in C will be different.
 
 In math the coverage is collected as follows:
-`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+`map[current_location_ID ^ previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
+
+The callstack hash is produced XOR-ing the function IDs to avoid explosion with
+recursive functions.
 
 ## Usage
 
@@ -20,3 +25,14 @@ Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
 It is highly recommended to increase the MAP_SIZE_POW2 definition in
 config.h to at least 18 and maybe up to 20 for this as otherwise too
 many map collisions occur.
+
+## Caller Branch Coverage
+
+If the context sensitive coverage introduces too may collisions and becoming
+detrimental, the user can choose to augment edge coverage with just the
+called function ID, instead of the entire callstack hash.
+
+In math the coverage is collected as follows:
+`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+
+Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment variable.
diff --git a/instrumentation/README.gcc_plugin.md b/instrumentation/README.gcc_plugin.md
index 12449efd..230ceb73 100644
--- a/instrumentation/README.gcc_plugin.md
+++ b/instrumentation/README.gcc_plugin.md
@@ -3,16 +3,20 @@
 See [../README.md](../README.md) for the general instruction manual.
 See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation.
 
+This document describes how to build and use `afl-gcc-fast` and `afl-g++-fast`,
+which instrument the target with the help of gcc plugins.
+
 TLDR:
-  * `apt-get install gcc-VERSION-plugin-dev`
-  * `make`
-  * gcc and g++ must point to the gcc-VERSION you you have to set AFL_CC/AFL_CXX
+  * check the version of your gcc compiler: `gcc --version`
+  * `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc plugins
+  * `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can set `AFL_CC`/`AFL_CXX`
     to point to these!
-  * just use afl-gcc-fast/afl-g++-fast normally like you would afl-clang-fast
+  * `make`
+  * just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with `afl-clang-fast`
 
 ## 1) Introduction
 
-The code in this directory allows you to instrument programs for AFL using
+The code in this directory allows to instrument programs for AFL using
 true compiler-level instrumentation, instead of the more crude
 assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
 several interesting properties:
@@ -27,10 +31,10 @@ several interesting properties:
 
   - The instrumentation is CPU-independent. At least in principle, you should
     be able to rely on it to fuzz programs on non-x86 architectures (after
-    building afl-fuzz with AFL_NOX86=1).
+    building `afl-fuzz` with `AFL_NOX86=1`).
 
   - Because the feature relies on the internals of GCC, it is gcc-specific
-    and will *not* work with LLVM (see ../llvm_mode for an alternative).
+    and will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an alternative).
 
 Once this implementation is shown to be sufficiently robust and portable, it
 will probably replace afl-gcc. For now, it can be built separately and
@@ -41,29 +45,32 @@ The idea and much of the implementation comes from Laszlo Szekeres.
 ## 2) How to use
 
 In order to leverage this mechanism, you need to have modern enough GCC
-(>= version 4.5.0) and the plugin headers installed on your system. That
+(>= version 4.5.0) and the plugin development headers installed on your system. That
 should be all you need. On Debian machines, these headers can be acquired by
 installing the `gcc-VERSION-plugin-dev` packages.
 
-To build the instrumentation itself, type 'make'. This will generate binaries
-called afl-gcc-fast and afl-g++-fast in the parent directory. 
+To build the instrumentation itself, type `make`. This will generate binaries
+called `afl-gcc-fast` and `afl-g++-fast` in the parent directory. 
 
 The gcc and g++ compiler links have to point to gcc-VERSION - or set these
-by pointing the environment variables AFL_CC/AFL_CXX to them.
-If the CC/CXX have been overridden, those compilers will be used from
-those wrappers without using AFL_CXX/AFL_CC settings.
+by pointing the environment variables `AFL_CC`/`AFL_CXX` to them.
+If the `CC`/`CXX` environment variables have been set, those compilers will be 
+preferred over those from the `AFL_CC`/`AFL_CXX` settings.
 
 Once this is done, you can instrument third-party code in a way similar to the
 standard operating mode of AFL, e.g.:
-
-  CC=/path/to/afl/afl-gcc-fast ./configure [...options...]
+```
+  CC=/path/to/afl/afl-gcc-fast
+  CXX=/path/to/afl/afl-g++-fast
+  export CC CXX
+  ./configure [...options...]
   make
+```
+Note: We also used `CXX` to set the C++ compiler to `afl-g++-fast` for C++ code.
 
-Be sure to also include CXX set to afl-g++-fast for C++ code.
-
-The tool honors roughly the same environmental variables as afl-gcc (see
-[env_variables.md](../docs/env_variables.md). This includes AFL_INST_RATIO,
-AFL_USE_ASAN, AFL_HARDEN, and AFL_DONT_OPTIMIZE.
+The tool honors roughly the same environmental variables as `afl-gcc` (see
+[env_variables.md](../docs/env_variables.md). This includes `AFL_INST_RATIO`,
+`AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`.
 
 Note: if you want the GCC plugin to be installed on your system for all
 users, you need to build it before issuing 'make install' in the parent
@@ -72,7 +79,7 @@ directory.
 ## 3) Gotchas, feedback, bugs
 
 This is an early-stage mechanism, so field reports are welcome. You can send bug
-reports to afl@aflplus.plus
+reports to afl@aflplus.plus.
 
 ## 4) Bonus feature #1: deferred initialization
 
@@ -88,7 +95,7 @@ file before getting to the fuzzed data.
 In such cases, it's beneficial to initialize the forkserver a bit later, once
 most of the initialization work is already done, but before the binary attempts
 to read the fuzzed input and parse it; in some cases, this can offer a 10x+
-performance gain. You can implement delayed initialization in LLVM mode in a
+performance gain. You can implement delayed initialization in GCC mode in a
 fairly simple way.
 
 First, locate a suitable location in the code where the delayed cloning can
@@ -117,7 +124,7 @@ With the location selected, add this code in the appropriate spot:
 ```
 
 You don't need the #ifdef guards, but they will make the program still work as
-usual when compiled with a tool other than afl-gcc-fast/afl-clang-fast.
+usual when compiled with a compiler other than afl-gcc-fast/afl-clang-fast.
 
 Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
 *not* generate a deferred-initialization binary) - and you should be all set!
@@ -127,7 +134,7 @@ Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
 Some libraries provide APIs that are stateless, or whose state can be reset in
 between processing different input files. When such a reset is performed, a
 single long-lived process can be reused to try out multiple test cases,
-eliminating the need for repeated fork() calls and the associated OS overhead.
+eliminating the need for repeated `fork()` calls and the associated OS overhead.
 
 The basic structure of the program that does this would be:
 
@@ -160,5 +167,9 @@ wary of memory leaks and the state of file descriptors.
 When running in this mode, the execution paths will inherently vary a bit
 depending on whether the input loop is being entered for the first time or
 executed again. To avoid spurious warnings, the feature implies
-AFL_NO_VAR_CHECK and hides the "variable path" warnings in the UI.
+`AFL_NO_VAR_CHECK` and hides the "variable path" warnings in the UI.
+
+## 6) Bonus feature #3: selective instrumentation
 
+It can be more effective to fuzzing to only instrument parts of the code.
+For details see [README.instrument_list.md](README.instrument_list.md).
diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md
index b7dfb40c..2116d24c 100644
--- a/instrumentation/README.instrument_list.md
+++ b/instrumentation/README.instrument_list.md
@@ -47,10 +47,10 @@ A special function is `__afl_coverage_interesting`.
 To use this, you must define `void __afl_coverage_interesting(u8 val, u32 id);`.
 Then you can use this function globally, where the `val` parameter can be set
 by you, the `id` parameter is for afl-fuzz and will be overwritten.
-Note that useful parameters are for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
+Note that useful parameters for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
 A value of e.g. 33 will be seen as 32 for coverage purposes.
 
-## 3) Selective instrumenation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
+## 3) Selective instrumentation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
 
 This feature is equivalent to llvm 12 sancov feature and allows to specify
 on a filename and/or function name level to instrument these or skip them.
diff --git a/instrumentation/README.lto.md b/instrumentation/README.lto.md
index a2814173..39f6465a 100644
--- a/instrumentation/README.lto.md
+++ b/instrumentation/README.lto.md
@@ -88,16 +88,35 @@ apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
 ### Building llvm yourself (version 12)
 
 Building llvm from github takes quite some long time and is not painless:
-```
+```sh
 sudo apt install binutils-dev  # this is *essential*!
-git clone https://github.com/llvm/llvm-project
+git clone --depth=1 https://github.com/llvm/llvm-project
 cd llvm-project
 mkdir build
 cd build
-cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
-make -j $(nproc)
-export PATH=`pwd`/bin:$PATH
-export LLVM_CONFIG=`pwd`/bin/llvm-config
+
+# Add -G Ninja if ninja-build installed
+# "Building with ninja significantly improves your build time, especially with
+# incremental builds, and improves your memory usage."
+cmake \
+    -DCLANG_INCLUDE_DOCS="OFF" \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DLLVM_BINUTILS_INCDIR=/usr/include/ \
+    -DLLVM_BUILD_LLVM_DYLIB="ON" \
+    -DLLVM_ENABLE_BINDINGS="OFF" \
+    -DLLVM_ENABLE_PROJECTS='clang;compiler-rt;libcxx;libcxxabi;libunwind;lld' \
+    -DLLVM_ENABLE_WARNINGS="OFF" \
+    -DLLVM_INCLUDE_BENCHMARKS="OFF" \
+    -DLLVM_INCLUDE_DOCS="OFF" \
+    -DLLVM_INCLUDE_EXAMPLES="OFF" \
+    -DLLVM_INCLUDE_TESTS="OFF" \
+    -DLLVM_LINK_LLVM_DYLIB="ON" \
+    -DLLVM_TARGETS_TO_BUILD="host" \
+    ../llvm/
+cmake --build . -j4
+export PATH="$(pwd)/bin:$PATH"
+export LLVM_CONFIG="$(pwd)/bin/llvm-config"
+export LD_LIBRARY_PATH="$(llvm-config --libdir)${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
 cd /path/to/AFLplusplus/
 make
 sudo make install
diff --git a/instrumentation/README.neverzero.md b/instrumentation/README.neverzero.md
index 5c894d6e..49104e00 100644
--- a/instrumentation/README.neverzero.md
+++ b/instrumentation/README.neverzero.md
@@ -16,7 +16,7 @@ at a very little cost (one instruction per edge).
 (The alternative of saturated counters has been tested also and proved to be
 inferior in terms of path discovery.)
 
-This is implemented in afl-gcc, however for llvm_mode this is optional if
+This is implemented in afl-gcc and afl-gcc-fast, however for llvm_mode this is optional if
 the llvm version is below 9 - as there is a perfomance bug that is only fixed
 in version 9 and onwards.
 
diff --git a/instrumentation/README.ngram.md b/instrumentation/README.ngram.md
index de3ba432..da61ef32 100644
--- a/instrumentation/README.ngram.md
+++ b/instrumentation/README.ngram.md
@@ -10,8 +10,8 @@ by Jinghan Wang, et. al.
 Note that the original implementation (available
 [here](https://github.com/bitsecurerlab/afl-sensitive))
 is built on top of AFL's QEMU mode.
-This is essentially a port that uses LLVM vectorized instructions to achieve
-the same results when compiling source code.
+This is essentially a port that uses LLVM vectorized instructions (available from
+llvm versions 4.0.1 and higher) to achieve the same results when compiling source code.
 
 In math the branch coverage is performed as follows:
 `map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
diff --git a/instrumentation/README.out_of_line.md b/instrumentation/README.out_of_line.md
index aad215b6..2264f91f 100644
--- a/instrumentation/README.out_of_line.md
+++ b/instrumentation/README.out_of_line.md
@@ -1,18 +1,16 @@
-===========================================
-Using afl++ without inlined instrumentation
-===========================================
+## Using afl++ without inlined instrumentation
 
   This file describes how you can disable inlining of instrumentation.
 
 
 By default, the GCC plugin will duplicate the effects of calling
-__afl_trace (see afl-gcc-rt.o.c) in instrumented code, instead of
+`__afl_trace` (see `afl-gcc-rt.o.c`) in instrumented code, instead of
 issuing function calls.
 
 The calls are presumed to be slower, more so because the rt file
 itself is not optimized by the compiler.
 
-Setting AFL_GCC_OUT_OF_LINE=1 in the environment while compiling code
+Setting `AFL_GCC_OUT_OF_LINE=1` in the environment while compiling code
 with the plugin will disable this inlining, issuing calls to the
 unoptimized runtime instead.
 
diff --git a/instrumentation/README.persistent_mode.md b/instrumentation/README.persistent_mode.md
index 2cf76adf..24f81ea0 100644
--- a/instrumentation/README.persistent_mode.md
+++ b/instrumentation/README.persistent_mode.md
@@ -16,7 +16,7 @@ Examples can be found in [utils/persistent_mode](../utils/persistent_mode).
 ## 2) TLDR;
 
 Example `fuzz_target.c`:
-```
+```c
 #include "what_you_need_for_your_target.h"
 
 __AFL_FUZZ_INIT();
@@ -60,14 +60,14 @@ The speed increase is usually x10 to x20.
 If you want to be able to compile the target without afl-clang-fast/lto then
 add this just after the includes:
 
-```
+```c
 #ifndef __AFL_FUZZ_TESTCASE_LEN
   ssize_t fuzz_len;
   #define __AFL_FUZZ_TESTCASE_LEN fuzz_len
   unsigned char fuzz_buf[1024000];
   #define __AFL_FUZZ_TESTCASE_BUF fuzz_buf
   #define __AFL_FUZZ_INIT() void sync(void);
-  #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ?
+  #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0)
   #define __AFL_INIT() sync() 
 #endif
 ```
@@ -75,7 +75,7 @@ add this just after the includes:
 ## 3) Deferred initialization
 
 AFL tries to optimize performance by executing the targeted binary just once,
-stopping it just before main(), and then cloning this "main" process to get
+stopping it just before `main()`, and then cloning this "main" process to get
 a steady supply of targets to fuzz.
 
 Although this approach eliminates much of the OS-, linker- and libc-level
@@ -97,7 +97,7 @@ a location after:
   - The creation of any vital threads or child processes - since the forkserver
     can't clone them easily.
 
-  - The initialization of timers via setitimer() or equivalent calls.
+  - The initialization of timers via `setitimer()` or equivalent calls.
 
   - The creation of temporary files, network sockets, offset-sensitive file
     descriptors, and similar shared-state resources - but only provided that
@@ -150,9 +150,9 @@ the impact of memory leaks and similar glitches; 1000 is a good starting point,
 and going much higher increases the likelihood of hiccups without giving you
 any real performance benefits.
 
-A more detailed template is shown in ../utils/persistent_mode/.
-Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
-guards can be used to suppress it when using other compilers.
+A more detailed template is shown in `../utils/persistent_mode/.`
+Similarly to the previous mode, the feature works only with afl-clang-fast; 
+`#ifdef` guards can be used to suppress it when using other compilers.
 
 Note that as with the previous mode, the feature is easy to misuse; if you
 do not fully reset the critical state, you may end up with false positives or
@@ -161,7 +161,7 @@ wary of memory leaks and of the state of file descriptors.
 
 PS. Because there are task switches still involved, the mode isn't as fast as
 "pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
-faster than the normal fork() model, and compared to in-process fuzzing,
+faster than the normal `fork()` model, and compared to in-process fuzzing,
 should be a lot more robust.
 
 ## 5) Shared memory fuzzing
@@ -174,17 +174,17 @@ Setting this up is very easy:
 
 After the includes set the following macro:
 
-```
+```c
 __AFL_FUZZ_INIT();
 ```
 Directly at the start of main - or if you are using the deferred forkserver
-with `__AFL_INIT()`  then *after* `__AFL_INIT? :
-```
+with `__AFL_INIT()` then *after* `__AFL_INIT()` :
+```c
   unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
 ```
 
 Then as first line after the `__AFL_LOOP` while loop:
-```
+```c
   int len = __AFL_FUZZ_TESTCASE_LEN;
 ```
 and that is all!
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
index e3490847..6dd390e6 100644
--- a/instrumentation/SanitizerCoverageLTO.so.cc
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -507,6 +507,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
   Zero = ConstantInt::get(Int8Tyi, 0);
   One = ConstantInt::get(Int8Tyi, 1);
 
+  initInstrumentList();
   scanForDangerousFunctions(&M);
   Mo = &M;
 
@@ -733,7 +734,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                             Var->getInitializer())) {
 
                       HasStr2 = true;
-                      Str2 = Array->getAsString().str();
+                      Str2 = Array->getRawDataValues().str();
 
                     }
 
@@ -760,7 +761,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    addedNull = true;
+                    // addedNull = true;
 
                   }
 
@@ -809,7 +810,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString().str();
+                      Str1 = Array->getRawDataValues().str();
 
                     }
 
@@ -849,15 +850,18 @@ bool ModuleSanitizerCoverage::instrumentModule(
               thestring = Str2;
 
             optLen = thestring.length();
+            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
 
             if (isMemcmp || isStrncmp || isStrncasecmp) {
 
               Value *      op2 = callInst->getArgOperand(2);
               ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
               if (ilen) {
 
                 uint64_t literalLength = optLen;
                 optLen = ilen->getZExtValue();
+                if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
                   thestring.append("\0", 1);
                   addedNull = true;
@@ -872,17 +876,21 @@ bool ModuleSanitizerCoverage::instrumentModule(
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false) {
+              if (addedNull == false && thestring[optLen - 1] != '\0') {
 
                 thestring.append("\0", 1);  // add null byte
                 optLen++;
 
               }
 
-              // ensure we do not have garbage
-              size_t offset = thestring.find('\0', 0);
-              if (offset + 1 < optLen) optLen = offset + 1;
-              thestring = thestring.substr(0, optLen);
+              if (!isStdString) {
+
+                // ensure we do not have garbage
+                size_t offset = thestring.find('\0', 0);
+                if (offset + 1 < optLen) optLen = offset + 1;
+                thestring = thestring.substr(0, optLen);
+
+              }
 
             }
 
@@ -1222,7 +1230,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
 
   // afl++ START
   if (!F.size()) return;
-  if (isIgnoreFunction(&F)) return;
+  if (!isInInstrumentList(&F)) return;
   // afl++ END
 
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
@@ -1284,10 +1292,17 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
 
+#if LLVM_VERSION_MAJOR > 12
+  if (TargetTriple.supportsCOMDAT() &&
+      (TargetTriple.isOSBinFormatELF() || !F.isInterposable()))
+    if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple))
+      Array->setComdat(Comdat);
+#else
   if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
     if (auto Comdat =
             GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
       Array->setComdat(Comdat);
+#endif
   Array->setSection(getSectionName(Section));
   Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
   GlobalsToAppendToUsed.push_back(Array);
diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc
index 5d6d6703..09cda9e2 100644
--- a/instrumentation/SanitizerCoveragePCGUARD.so.cc
+++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/EHPersonalities.h"
@@ -34,11 +35,11 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SpecialCaseList.h"
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
   #include "llvm/Support/VirtualFileSystem.h"
 #endif
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -47,65 +48,6 @@
 #include "debug.h"
 #include "afl-llvm-common.h"
 
-namespace llvm {
-
-/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The
-/// pass instruments functions for coverage, adds initialization calls to the
-/// module for trace PC guards and 8bit counters if they are requested, and
-/// appends globals to llvm.compiler.used.
-class ModuleSanitizerCoveragePass
-    : public PassInfoMixin<ModuleSanitizerCoveragePass> {
-
- public:
-  explicit ModuleSanitizerCoveragePass(
-      SanitizerCoverageOptions        Options = SanitizerCoverageOptions(),
-      const std::vector<std::string> &AllowlistFiles =
-          std::vector<std::string>(),
-      const std::vector<std::string> &BlocklistFiles =
-          std::vector<std::string>())
-      : Options(Options) {
-
-    if (AllowlistFiles.size() > 0)
-      Allowlist = SpecialCaseList::createOrDie(AllowlistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
-    if (BlocklistFiles.size() > 0)
-      Blocklist = SpecialCaseList::createOrDie(BlocklistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
-
-  }
-
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-  static bool       isRequired() {
-
-    return true;
-
-  }
-
- private:
-  SanitizerCoverageOptions Options;
-
-  std::unique_ptr<SpecialCaseList> Allowlist;
-  std::unique_ptr<SpecialCaseList> Blocklist;
-
-};
-
-// Insert SanitizerCoverage instrumentation.
-ModulePass *createModuleSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
-    const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(),
-    const std::vector<std::string> &BlocklistFiles =
-        std::vector<std::string>());
-
-}  // namespace llvm
-
 using namespace llvm;
 
 #define DEBUG_TYPE "sancov"
@@ -156,96 +98,8 @@ static const char *const SanCovLowestStackName = "__sancov_lowest_stack";
 
 static char *skip_nozero;
 
-/*
-static cl::opt<int> ClCoverageLevel(
-    "sanitizer-coverage-level",
-    cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
-             "3: all blocks and critical edges"),
-    cl::Hidden, cl::init(3));
-
-static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
-                               cl::desc("Experimental pc tracing"), cl::Hidden,
-                               cl::init(false));
-
-static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
-                                    cl::desc("pc tracing with a guard"),
-                                    cl::Hidden, cl::init(true));
-
-// If true, we create a global variable that contains PCs of all instrumented
-// BBs, put this global into a named section, and pass this section's bounds
-// to __sanitizer_cov_pcs_init.
-// This way the coverage instrumentation does not need to acquire the PCs
-// at run-time. Works with trace-pc-guard, inline-8bit-counters, and
-// inline-bool-flag.
-static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table",
-                                     cl::desc("create a static PC table"),
-                                     cl::Hidden, cl::init(false));
-
-static cl::opt<bool> ClInline8bitCounters(
-    "sanitizer-coverage-inline-8bit-counters",
-    cl::desc("increments 8-bit counter for every edge"), cl::Hidden,
-    cl::init(false));
-
-static cl::opt<bool> ClInlineBoolFlag(
-    "sanitizer-coverage-inline-bool-flag",
-    cl::desc("sets a boolean flag for every edge"), cl::Hidden,
-    cl::init(false));
-
-static cl::opt<bool> ClCMPTracing(
-    "sanitizer-coverage-trace-compares",
-    cl::desc("Tracing of CMP and similar instructions"), cl::Hidden,
-    cl::init(false));
-
-static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
-                                  cl::desc("Tracing of DIV instructions"),
-                                  cl::Hidden, cl::init(false));
-
-static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
-                                  cl::desc("Tracing of GEP instructions"),
-                                  cl::Hidden, cl::init(false));
-
-static cl::opt<bool> ClPruneBlocks(
-    "sanitizer-coverage-prune-blocks",
-    cl::desc("Reduce the number of instrumented blocks"), cl::Hidden,
-    cl::init(true));
-
-static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth",
-                                  cl::desc("max stack depth tracing"),
-                                  cl::Hidden, cl::init(false));
-*/
 namespace {
 
-/*
-SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
-
-  SanitizerCoverageOptions Res;
-  switch (LegacyCoverageLevel) {
-
-    case 0:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_None;
-      break;
-    case 1:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
-      break;
-    case 2:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
-      break;
-    case 3:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
-      break;
-    case 4:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
-      Res.IndirectCalls = true;
-      break;
-
-  }
-
-  return Res;
-
-}
-
-*/
-
 SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
 
   // Sets CoverageType and IndirectCalls.
@@ -281,12 +135,14 @@ class ModuleSanitizerCoverage {
 
  public:
   ModuleSanitizerCoverage(
-      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
-      const SpecialCaseList *         Allowlist = nullptr,
-      const SpecialCaseList *         Blocklist = nullptr)
-      : Options(OverrideFromCL(Options)),
-        Allowlist(Allowlist),
-        Blocklist(Blocklist) {
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()
+#if LLVM_MAJOR > 10
+          ,
+      const SpecialCaseList *Allowlist = nullptr,
+      const SpecialCaseList *Blocklist = nullptr
+#endif
+      )
+      : Options(OverrideFromCL(Options)) {
 
   }
 
@@ -356,9 +212,6 @@ class ModuleSanitizerCoverage {
 
   SanitizerCoverageOptions Options;
 
-  const SpecialCaseList *Allowlist;
-  const SpecialCaseList *Blocklist;
-
   uint32_t        instr = 0;
   GlobalVariable *AFLMapPtr = NULL;
   ConstantInt *   One = NULL;
@@ -370,27 +223,17 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
 
  public:
   ModuleSanitizerCoverageLegacyPass(
-      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()
+#if LLVM_VERSION_MAJOR > 10
+          ,
       const std::vector<std::string> &AllowlistFiles =
           std::vector<std::string>(),
       const std::vector<std::string> &BlocklistFiles =
-          std::vector<std::string>())
+          std::vector<std::string>()
+#endif
+          )
       : ModulePass(ID), Options(Options) {
 
-    if (AllowlistFiles.size() > 0)
-      Allowlist = SpecialCaseList::createOrDie(AllowlistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
-    if (BlocklistFiles.size() > 0)
-      Blocklist = SpecialCaseList::createOrDie(BlocklistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
     initializeModuleSanitizerCoverageLegacyPassPass(
         *PassRegistry::getPassRegistry());
 
@@ -398,8 +241,12 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
 
   bool runOnModule(Module &M) override {
 
-    ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
-                                         Blocklist.get());
+    ModuleSanitizerCoverage ModuleSancov(Options
+#if LLVM_MAJOR > 10
+                                         ,
+                                         Allowlist.get(), Blocklist.get()
+#endif
+    );
     auto DTCallback = [this](Function &F) -> const DominatorTree * {
 
       return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
@@ -444,8 +291,12 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
 PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &               M,
                                                    ModuleAnalysisManager &MAM) {
 
-  ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
-                                       Blocklist.get());
+  ModuleSanitizerCoverage ModuleSancov(Options
+#if LLVM_MAJOR > 10
+                                       ,
+                                       Allowlist.get(), Blocklist.get()
+#endif
+  );
   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   auto  DTCallback = [&FAM](Function &F) -> const DominatorTree * {
 
@@ -564,12 +415,6 @@ bool ModuleSanitizerCoverage::instrumentModule(
   }
 
   if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false;
-  if (Allowlist &&
-      !Allowlist->inSection("coverage", "src", M.getSourceFileName()))
-    return false;
-  if (Blocklist &&
-      Blocklist->inSection("coverage", "src", M.getSourceFileName()))
-    return false;
   C = &(M.getContext());
   DL = &M.getDataLayout();
   CurModule = &M;
@@ -842,9 +687,6 @@ void ModuleSanitizerCoverage::instrumentFunction(
   if (F.hasPersonalityFn() &&
       isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
     return;
-  if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName()))
-    return;
-  if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) return;
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
     SplitAllCriticalEdges(
         F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
@@ -915,10 +757,18 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
 
+#if LLVM_VERSION_MAJOR > 12
+  if (TargetTriple.supportsCOMDAT() &&
+      (TargetTriple.isOSBinFormatELF() || !F.isInterposable()))
+    if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple))
+      Array->setComdat(Comdat);
+#else
   if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
     if (auto Comdat =
             GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
       Array->setComdat(Comdat);
+#endif
+
   Array->setSection(getSectionName(Section));
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
   Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
@@ -1088,7 +938,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
 
       }
 
-      llvm::sort(Initializers.begin() + 2, Initializers.end(),
+      llvm::sort(drop_begin(Initializers, 2),
                  [](const Constant *A, const Constant *B) {
 
                    return cast<ConstantInt>(A)->getLimitedValue() <
@@ -1136,10 +986,10 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
   for (auto GEP : GepTraceTargets) {
 
     IRBuilder<> IRB(GEP);
-    for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
-      if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+    for (Use &Idx : GEP->indices())
+      if (!isa<ConstantInt>(Idx) && Idx->getType()->isIntegerTy())
         IRB.CreateCall(SanCovTraceGepFunction,
-                       {IRB.CreateIntCast(*I, IntptrTy, true)});
+                       {IRB.CreateIntCast(Idx, IntptrTy, true)});
 
   }
 
@@ -1354,12 +1204,20 @@ INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
                     false)
 
 ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options,
+    const SanitizerCoverageOptions &Options
+#if LLVM_MAJOR > 10
+    ,
     const std::vector<std::string> &AllowlistFiles,
-    const std::vector<std::string> &BlocklistFiles) {
+    const std::vector<std::string> &BlocklistFiles
+#endif
+) {
 
-  return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles,
-                                               BlocklistFiles);
+  return new ModuleSanitizerCoverageLegacyPass(Options
+#if LLVM_MAJOR > 10
+                                               ,
+                                               AllowlistFiles, BlocklistFiles
+#endif
+  );
 
 }
 
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index c24173af..f241447a 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -20,7 +20,7 @@
 #include "config.h"
 #include "types.h"
 #include "cmplog.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -34,6 +34,7 @@
 #include <errno.h>
 
 #include <sys/mman.h>
+#include <sys/syscall.h>
 #ifndef __HAIKU__
   #include <sys/shm.h>
 #endif
@@ -70,7 +71,7 @@
    run. It will end up as .comm, so it shouldn't be too wasteful. */
 
 #if MAP_SIZE <= 65536
-  #define MAP_INITIAL_SIZE 256000
+  #define MAP_INITIAL_SIZE 2097152
 #else
   #define MAP_INITIAL_SIZE MAP_SIZE
 #endif
@@ -96,10 +97,12 @@ int __afl_selective_coverage_temp = 1;
 
 #if defined(__ANDROID__) || defined(__HAIKU__)
 PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+PREV_LOC_T __afl_prev_caller[CTX_MAX_K];
 u32        __afl_prev_ctx;
 u32        __afl_cmp_counter;
 #else
 __thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+__thread PREV_LOC_T __afl_prev_caller[CTX_MAX_K];
 __thread u32        __afl_prev_ctx;
 __thread u32        __afl_cmp_counter;
 #endif
@@ -122,6 +125,21 @@ static u8 is_persistent;
 
 static u8 _is_sancov;
 
+/* Debug? */
+
+static u32 __afl_debug;
+
+/* Already initialized markers */
+
+u32 __afl_already_initialized_shm;
+u32 __afl_already_initialized_forkserver;
+u32 __afl_already_initialized_first;
+u32 __afl_already_initialized_second;
+
+/* Dummy pipe for area_is_valid() */
+
+static int __afl_dummy_fd[2] = {2, 2};
+
 /* ensure we kill the child on termination */
 
 void at_exit(int signal) {
@@ -171,7 +189,7 @@ static void __afl_map_shm_fuzz() {
 
   char *id_str = getenv(SHM_FUZZ_ENV_VAR);
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr, "DEBUG: fuzzcase shmem %s\n", id_str ? id_str : "none");
 
@@ -186,7 +204,7 @@ static void __afl_map_shm_fuzz() {
     int         shm_fd = -1;
 
     /* create the shared memory segment as if it was a file */
-    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION);
     if (shm_fd == -1) {
 
       fprintf(stderr, "shm_open() failed for fuzz\n");
@@ -217,7 +235,7 @@ static void __afl_map_shm_fuzz() {
     __afl_fuzz_len = (u32 *)map;
     __afl_fuzz_ptr = map + sizeof(u32);
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n");
 
@@ -237,6 +255,9 @@ static void __afl_map_shm_fuzz() {
 
 static void __afl_map_shm(void) {
 
+  if (__afl_already_initialized_shm) return;
+  __afl_already_initialized_shm = 1;
+
   // if we are not running in afl ensure the map exists
   if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_ptr_dummy; }
 
@@ -244,8 +265,12 @@ static void __afl_map_shm(void) {
 
   if (__afl_final_loc) {
 
-    if (__afl_final_loc % 32)
-      __afl_final_loc = (((__afl_final_loc + 31) >> 5) << 5);
+    if (__afl_final_loc % 64) {
+
+      __afl_final_loc = (((__afl_final_loc + 63) >> 6) << 6);
+
+    }
+
     __afl_map_size = __afl_final_loc;
 
     if (__afl_final_loc > MAP_SIZE) {
@@ -290,18 +315,23 @@ static void __afl_map_shm(void) {
      early-stage __afl_area_initial region that is needed to allow some really
      hacky .init code to work correctly in projects such as OpenSSL. */
 
-  if (getenv("AFL_DEBUG"))
+  if (__afl_debug) {
+
     fprintf(stderr,
-            "DEBUG: id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
-            "__afl_map_addr 0x%llx, MAP_SIZE %u, __afl_final_loc %u, "
+            "DEBUG: (1) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
+            "__afl_area_ptr_dummy 0x%p, __afl_map_addr 0x%llx, MAP_SIZE %u, "
+            "__afl_final_loc %u, "
             "max_size_forkserver %u/0x%x\n",
             id_str == NULL ? "<null>" : id_str, __afl_area_ptr,
-            __afl_area_initial, __afl_map_addr, MAP_SIZE, __afl_final_loc,
-            FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+            __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE,
+            __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+
+  }
 
   if (id_str) {
 
-    if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial) {
+    if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial &&
+        __afl_area_ptr != __afl_area_ptr_dummy) {
 
       if (__afl_map_addr) {
 
@@ -323,7 +353,7 @@ static void __afl_map_shm(void) {
     unsigned char *shm_base = NULL;
 
     /* create the shared memory segment as if it was a file */
-    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION);
     if (shm_fd == -1) {
 
       fprintf(stderr, "shm_open() failed\n");
@@ -346,17 +376,18 @@ static void __afl_map_shm(void) {
 
     }
 
-    if (shm_base == MAP_FAILED) {
+    close(shm_fd);
+    shm_fd = -1;
 
-      close(shm_fd);
-      shm_fd = -1;
+    if (shm_base == MAP_FAILED) {
 
       fprintf(stderr, "mmap() failed\n");
+      perror("mmap for map");
+
       if (__afl_map_addr)
         send_forkserver_error(FS_ERROR_MAP_ADDR);
       else
         send_forkserver_error(FS_ERROR_MMAP);
-      perror("mmap for map");
 
       exit(2);
 
@@ -368,8 +399,8 @@ static void __afl_map_shm(void) {
 
     if (__afl_map_size && __afl_map_size > MAP_SIZE) {
 
-      u8 *map_env = getenv("AFL_MAP_SIZE");
-      if (!map_env || atoi(map_env) < MAP_SIZE) {
+      u8 *map_env = (u8 *)getenv("AFL_MAP_SIZE");
+      if (!map_env || atoi((char *)map_env) < MAP_SIZE) {
 
         send_forkserver_error(FS_ERROR_MAP_SIZE);
         _exit(1);
@@ -378,7 +409,7 @@ static void __afl_map_shm(void) {
 
     }
 
-    __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0);
+    __afl_area_ptr = (u8 *)shmat(shm_id, (void *)__afl_map_addr, 0);
 
     /* Whooooops. */
 
@@ -405,9 +436,9 @@ static void __afl_map_shm(void) {
 
              __afl_map_addr) {
 
-    __afl_area_ptr =
-        mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
-             MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+    __afl_area_ptr = (u8 *)mmap(
+        (void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
+        MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 
     if (__afl_area_ptr == MAP_FAILED) {
 
@@ -425,7 +456,7 @@ static void __afl_map_shm(void) {
 
     if (__afl_final_loc > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr = malloc(__afl_final_loc);
+      __afl_area_ptr = (u8 *)malloc(__afl_final_loc);
 
     }
 
@@ -435,11 +466,24 @@ static void __afl_map_shm(void) {
 
   __afl_area_ptr_backup = __afl_area_ptr;
 
+  if (__afl_debug) {
+
+    fprintf(stderr,
+            "DEBUG: (2) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
+            "__afl_area_ptr_dummy 0x%p, __afl_map_addr 0x%llx, MAP_SIZE "
+            "%u, __afl_final_loc %u, "
+            "max_size_forkserver %u/0x%x\n",
+            id_str == NULL ? "<null>" : id_str, __afl_area_ptr,
+            __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE,
+            __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+
+  }
+
   if (__afl_selective_coverage) {
 
     if (__afl_map_size > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr_dummy = malloc(__afl_map_size);
+      __afl_area_ptr_dummy = (u8 *)malloc(__afl_map_size);
 
       if (__afl_area_ptr_dummy) {
 
@@ -463,7 +507,7 @@ static void __afl_map_shm(void) {
 
   id_str = getenv(CMPLOG_SHM_ENV_VAR);
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr, "DEBUG: cmplog id_str %s\n",
             id_str == NULL ? "<null>" : id_str);
@@ -472,13 +516,19 @@ static void __afl_map_shm(void) {
 
   if (id_str) {
 
+    if ((__afl_dummy_fd[1] = open("/dev/null", O_WRONLY)) < 0) {
+
+      if (pipe(__afl_dummy_fd) < 0) { __afl_dummy_fd[1] = 1; }
+
+    }
+
 #ifdef USEMMAP
     const char *    shm_file_path = id_str;
     int             shm_fd = -1;
     struct cmp_map *shm_base = NULL;
 
     /* create the shared memory segment as if it was a file */
-    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION);
     if (shm_fd == -1) {
 
       perror("shm_open() failed\n");
@@ -505,7 +555,7 @@ static void __afl_map_shm(void) {
 #else
     u32 shm_id = atoi(id_str);
 
-    __afl_cmp_map = shmat(shm_id, NULL, 0);
+    __afl_cmp_map = (struct cmp_map *)shmat(shm_id, NULL, 0);
 #endif
 
     __afl_cmp_map_backup = __afl_cmp_map;
@@ -522,6 +572,58 @@ static void __afl_map_shm(void) {
 
 }
 
+/* unmap SHM. */
+
+static void __afl_unmap_shm(void) {
+
+  if (!__afl_already_initialized_shm) return;
+
+  char *id_str = getenv(SHM_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+
+    munmap((void *)__afl_area_ptr, __afl_map_size);
+
+#else
+
+    shmdt((void *)__afl_area_ptr);
+
+#endif
+
+  } else if ((!__afl_area_ptr || __afl_area_ptr == __afl_area_initial) &&
+
+             __afl_map_addr) {
+
+    munmap((void *)__afl_map_addr, __afl_map_size);
+
+  }
+
+  __afl_area_ptr = __afl_area_ptr_dummy;
+
+  id_str = getenv(CMPLOG_SHM_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+
+    munmap((void *)__afl_cmp_map, __afl_map_size);
+
+#else
+
+    shmdt((void *)__afl_cmp_map);
+
+#endif
+
+    __afl_cmp_map = NULL;
+
+  }
+
+  __afl_already_initialized_shm = 0;
+
+}
+
 #ifdef __linux__
 static void __afl_start_snapshots(void) {
 
@@ -550,7 +652,7 @@ static void __afl_start_snapshots(void) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) { _exit(1); }
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
 
@@ -627,7 +729,7 @@ static void __afl_start_snapshots(void) {
       static uint32_t counter = 0;
       char            fn[32];
       sprintf(fn, "%09u:forkserver", counter);
-      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
       if (fd_doc >= 0) {
 
         if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) {
@@ -727,6 +829,9 @@ static void __afl_start_snapshots(void) {
 
 static void __afl_start_forkserver(void) {
 
+  if (__afl_already_initialized_forkserver) return;
+  __afl_already_initialized_forkserver = 1;
+
   struct sigaction orig_action;
   sigaction(SIGTERM, NULL, &orig_action);
   old_sigterm_handler = orig_action.sa_handler;
@@ -777,7 +882,7 @@ static void __afl_start_forkserver(void) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
 
@@ -855,7 +960,7 @@ static void __afl_start_forkserver(void) {
       static uint32_t counter = 0;
       char            fn[32];
       sprintf(fn, "%09u:forkserver", counter);
-      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
       if (fd_doc >= 0) {
 
         if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) {
@@ -1012,11 +1117,14 @@ void __afl_manual_init(void) {
     __afl_sharedmem_fuzzing = 0;
     if (__afl_area_ptr == NULL) __afl_area_ptr = __afl_area_ptr_dummy;
 
-    if (getenv("AFL_DEBUG"))
+    if (__afl_debug) {
+
       fprintf(stderr,
               "DEBUG: disabled instrumentation because of "
               "AFL_DISABLE_LLVM_INSTRUMENTATION\n");
 
+    }
+
   }
 
   if (!init_done) {
@@ -1056,6 +1164,11 @@ __attribute__((constructor(CTOR_PRIO))) void __afl_auto_early(void) {
 
 __attribute__((constructor(1))) void __afl_auto_second(void) {
 
+  if (__afl_already_initialized_second) return;
+  __afl_already_initialized_second = 1;
+
+  if (getenv("AFL_DEBUG")) { __afl_debug = 1; }
+
   if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
   u8 *ptr;
 
@@ -1080,17 +1193,18 @@ __attribute__((constructor(1))) void __afl_auto_second(void) {
 
   }
 
-}
+}  // ptr memleak report is a false positive
 
 /* preset __afl_area_ptr #1 - at constructor level 0 global variables have
    not been set */
 
 __attribute__((constructor(0))) void __afl_auto_first(void) {
 
-  if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
-  u8 *ptr;
+  if (__afl_already_initialized_first) return;
+  __afl_already_initialized_first = 1;
 
-  ptr = (u8 *)malloc(1024000);
+  if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+  u8 *ptr = (u8 *)malloc(MAP_INITIAL_SIZE);
 
   if (ptr && (ssize_t)ptr != -1) {
 
@@ -1099,7 +1213,7 @@ __attribute__((constructor(0))) void __afl_auto_first(void) {
 
   }
 
-}
+}  // ptr memleak report is a false positive
 
 /* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard.
    It remains non-operational in the traditional, plugin-backed LLVM mode.
@@ -1167,11 +1281,13 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   _is_sancov = 1;
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr,
-            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges)\n",
-            start, stop, stop - start);
+            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) "
+            "after_fs=%u\n",
+            start, stop, (unsigned long)(stop - start),
+            __afl_already_initialized_forkserver);
 
   }
 
@@ -1187,6 +1303,40 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  /* instrumented code is loaded *after* our forkserver is up. this is a
+     problem. We cannot prevent collisions then :( */
+  if (__afl_already_initialized_forkserver &&
+      __afl_final_loc + 1 + stop - start > __afl_map_size) {
+
+    if (__afl_debug) {
+
+      fprintf(stderr, "Warning: new instrumented code after the forkserver!\n");
+
+    }
+
+    __afl_final_loc = 2;
+
+    if (1 + stop - start > __afl_map_size) {
+
+      *(start++) = ++__afl_final_loc;
+
+      while (start < stop) {
+
+        if (R(100) < inst_ratio)
+          *start = ++__afl_final_loc % __afl_map_size;
+        else
+          *start = 0;
+
+        start++;
+
+      }
+
+      return;
+
+    }
+
+  }
+
   /* Make sure that the first element in the range is always set - we use that
      to avoid duplicate calls (which can happen as an artifact of the underlying
      implementation in LLVM). */
@@ -1204,6 +1354,28 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  if (__afl_debug) {
+
+    fprintf(stderr,
+            "Done __sanitizer_cov_trace_pc_guard_init: __afl_final_loc = %u\n",
+            __afl_final_loc);
+
+  }
+
+  if (__afl_already_initialized_shm && __afl_final_loc > __afl_map_size) {
+
+    if (__afl_debug) {
+
+      fprintf(stderr, "Reinit shm necessary (+%u)\n",
+              __afl_final_loc - __afl_map_size);
+
+    }
+
+    __afl_unmap_shm();
+    __afl_map_shm();
+
+  }
+
 }
 
 ///// CmpLog instrumentation
@@ -1455,24 +1627,48 @@ void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) {
 
 }
 
+void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) {
+
+  __cmplog_ins_hook1(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) {
 
   __cmplog_ins_hook2(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) {
+
+  __cmplog_ins_hook2(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) {
 
   __cmplog_ins_hook4(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) {
+
+  __cmplog_ins_hook4(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) {
 
   __cmplog_ins_hook8(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) {
+
+  __cmplog_ins_hook8(arg1, arg2, 0);
+
+}
+
 #ifdef WORD_SIZE_64
 void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) {
 
@@ -1480,6 +1676,12 @@ void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) {
 
 }
 
+void __sanitizer_cov_trace_const_cmp16(uint128_t arg1, uint128_t arg2) {
+
+  __cmplog_ins_hook16(arg1, arg2, 0);
+
+}
+
 #endif
 
 void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
@@ -1523,17 +1725,43 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
 
 }
 
+__attribute__((weak)) void *__asan_region_is_poisoned(void *beg, size_t size) {
+
+  return NULL;
+
+}
+
 // POSIX shenanigan to see if an area is mapped.
 // If it is mapped as X-only, we have a problem, so maybe we should add a check
 // to avoid to call it on .text addresses
-static int area_is_mapped(void *ptr, size_t len) {
+static int area_is_valid(void *ptr, size_t len) {
+
+  if (unlikely(!ptr || __asan_region_is_poisoned(ptr, len))) { return 0; }
+
+  long r = syscall(SYS_write, __afl_dummy_fd[1], ptr, len);
+
+  if (r <= 0 || r > len) return 0;
+
+  // even if the write succeed this can be a false positive if we cross
+  // a page boundary. who knows why.
+
+  char *p = (char *)ptr;
+  long  page_size = sysconf(_SC_PAGE_SIZE);
+  char *page = (char *)((uintptr_t)p & ~(page_size - 1)) + page_size;
 
-  char *p = ptr;
-  char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
+  if (page > p + len) {
 
-  int r = msync(page, (p - page) + len, MS_ASYNC);
-  if (r < 0) return errno != ENOMEM;
-  return 1;
+    // no, not crossing a page boundary
+    return (int)r;
+
+  } else {
+
+    // yes it crosses a boundary, hence we can only return the length of
+    // rest of the first page, we cannot detect if the next page is valid
+    // or not, neither by SYS_write nor msync() :-(
+    return (int)(page - p);
+
+  }
 
 }
 
@@ -1541,20 +1769,25 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   /*
     u32 i;
-    if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
+    if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
     fprintf(stderr, "rtn arg0=");
-    for (i = 0; i < 24; i++)
+    for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr1[i]);
     fprintf(stderr, " arg1=");
-    for (i = 0; i < 24; i++)
+    for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr2[i]);
     fprintf(stderr, "\n");
   */
 
   if (unlikely(!__afl_cmp_map)) return;
+  // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2);
+  int l1, l2;
+  if ((l1 = area_is_valid(ptr1, 32)) <= 0 ||
+      (l2 = area_is_valid(ptr2, 32)) <= 0)
+    return;
+  int len = MIN(l1, l2);
 
-  if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
-
+  // fprintf(stderr, "RTN2 %u\n", len);
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
@@ -1564,17 +1797,17 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
   if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
 
     __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
-    hits = 0;
     __afl_cmp_map->headers[k].hits = 1;
-    __afl_cmp_map->headers[k].shape = 31;
+    __afl_cmp_map->headers[k].shape = len - 1;
+    hits = 0;
 
   } else {
 
     hits = __afl_cmp_map->headers[k].hits++;
 
-    if (__afl_cmp_map->headers[k].shape < 31) {
+    if (__afl_cmp_map->headers[k].shape < len) {
 
-      __afl_cmp_map->headers[k].shape = 31;
+      __afl_cmp_map->headers[k].shape = len - 1;
 
     }
 
@@ -1582,9 +1815,10 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   hits &= CMP_MAP_RTN_H - 1;
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
-                   ptr1, 32);
+                   ptr1, len);
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1,
-                   ptr2, 32);
+                   ptr2, len);
+  // fprintf(stderr, "RTN3\n");
 
 }
 
@@ -1629,12 +1863,20 @@ static u8 *get_llvm_stdstring(u8 *string) {
 
 void __cmplog_rtn_gcc_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_gcc_stdstring(stdstring), cstring);
 
 }
 
 void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_gcc_stdstring(stdstring1),
                     get_gcc_stdstring(stdstring2));
 
@@ -1642,12 +1884,20 @@ void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
 void __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_llvm_stdstring(stdstring), cstring);
 
 }
 
 void __cmplog_rtn_llvm_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_llvm_stdstring(stdstring1),
                     get_llvm_stdstring(stdstring2));
 
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index a27c4069..74943fb2 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -60,20 +60,25 @@ bool isIgnoreFunction(const llvm::Function *F) {
       "asan.",
       "llvm.",
       "sancov.",
-      "__ubsan_",
+      "__ubsan",
       "ign.",
-      "__afl_",
+      "__afl",
       "_fini",
-      "__libc_csu",
+      "__libc_",
       "__asan",
       "__msan",
       "__cmplog",
       "__sancov",
+      "__san",
+      "__cxx_",
+      "__decide_deferred",
+      "_GLOBAL",
+      "_ZZN6__asan",
+      "_ZZN6__lsan",
       "msan.",
       "LLVMFuzzerM",
       "LLVMFuzzerC",
       "LLVMFuzzerI",
-      "__decide_deferred",
       "maybe_duplicate_stderr",
       "discard_output",
       "close_stdout",
@@ -89,6 +94,20 @@ bool isIgnoreFunction(const llvm::Function *F) {
 
   }
 
+  static const char *ignoreSubstringList[] = {
+
+      "__asan",       "__msan",     "__ubsan", "__lsan",
+      "__san",        "__sanitize", "__cxx",   "_GLOBAL__",
+      "DebugCounter", "DwarfDebug", "DebugLoc"
+
+  };
+
+  for (auto const &ignoreListFunc : ignoreSubstringList) {
+
+    if (F->getName().contains(ignoreListFunc)) { return true; }
+
+  }
+
   return false;
 
 }
@@ -351,7 +370,7 @@ static std::string getSourceName(llvm::Function *F) {
 
     if (cDILoc) { instFilename = cDILoc->getFilename(); }
 
-    if (instFilename.str().empty()) {
+    if (instFilename.str().empty() && cDILoc) {
 
       /* If the original location is empty, try using the inlined location
        */
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
index a4b33732..c954054b 100644
--- a/instrumentation/afl-llvm-dict2file.so.cc
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -90,7 +90,7 @@ void dict2file(int fd, u8 *mem, u32 len) {
   j = 1;
   for (i = 0; i < len; i++) {
 
-    if (isprint(mem[i])) {
+    if (isprint(mem[i]) && mem[i] != '\\' && mem[i] != '"') {
 
       line[j++] = mem[i];
 
@@ -357,6 +357,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           StringRef   TmpStr;
           bool        HasStr1;
           getConstantStringInfo(Str1P, TmpStr);
+
           if (TmpStr.empty()) {
 
             HasStr1 = false;
@@ -403,7 +404,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr2 = true;
-                    Str2 = Array->getAsString().str();
+                    Str2 = Array->getRawDataValues().str();
 
                   }
 
@@ -430,7 +431,6 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                 if (literalLength + 1 == optLength) {
 
                   Str2.append("\0", 1);  // add null byte
-                  addedNull = true;
 
                 }
 
@@ -480,7 +480,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr1 = true;
-                    Str1 = Array->getAsString().str();
+                    Str1 = Array->getRawDataValues().str();
 
                   }
 
@@ -521,14 +521,18 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
           optLen = thestring.length();
 
+          if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
+
           if (isMemcmp || isStrncmp || isStrncasecmp) {
 
             Value *      op2 = callInst->getArgOperand(2);
             ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
             if (ilen) {
 
               uint64_t literalLength = optLen;
               optLen = ilen->getZExtValue();
+              if (optLen < 2) { continue; }
               if (literalLength + 1 == optLen) {  // add null byte
                 thestring.append("\0", 1);
                 addedNull = true;
@@ -543,17 +547,21 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           // was not already added
           if (!isMemcmp) {
 
-            if (addedNull == false) {
+            if (addedNull == false && thestring[optLen - 1] != '\0') {
 
               thestring.append("\0", 1);  // add null byte
               optLen++;
 
             }
 
-            // ensure we do not have garbage
-            size_t offset = thestring.find('\0', 0);
-            if (offset + 1 < optLen) optLen = offset + 1;
-            thestring = thestring.substr(0, optLen);
+            if (!isStdString) {
+
+              // ensure we do not have garbage
+              size_t offset = thestring.find('\0', 0);
+              if (offset + 1 < optLen) optLen = offset + 1;
+              thestring = thestring.substr(0, optLen);
+
+            }
 
           }
 
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
index fa494f44..50306224 100644
--- a/instrumentation/afl-llvm-lto-instrumentation.so.cc
+++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc
@@ -69,7 +69,8 @@ class AFLLTOPass : public ModulePass {
 
     if (getenv("AFL_DEBUG")) debug = 1;
     if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
-      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
+      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 ||
+          afl_global_id >= MAP_SIZE)
         FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n",
               ptr, MAP_SIZE - 1);
 
@@ -518,7 +519,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                             Var->getInitializer())) {
 
                       HasStr2 = true;
-                      Str2 = Array->getAsString().str();
+                      Str2 = Array->getRawDataValues().str();
 
                     }
 
@@ -545,7 +546,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    addedNull = true;
+                    // addedNull = true;
 
                   }
 
@@ -594,7 +595,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString().str();
+                      Str1 = Array->getRawDataValues().str();
 
                     }
 
@@ -634,15 +635,18 @@ bool AFLLTOPass::runOnModule(Module &M) {
               thestring = Str2;
 
             optLen = thestring.length();
+            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
 
             if (isMemcmp || isStrncmp || isStrncasecmp) {
 
               Value *      op2 = callInst->getArgOperand(2);
               ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
               if (ilen) {
 
                 uint64_t literalLength = optLen;
                 optLen = ilen->getZExtValue();
+                if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
                   thestring.append("\0", 1);
                   addedNull = true;
@@ -657,17 +661,21 @@ bool AFLLTOPass::runOnModule(Module &M) {
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false) {
+              if (addedNull == false && thestring[optLen - 1] != '\0') {
 
                 thestring.append("\0", 1);  // add null byte
                 optLen++;
 
               }
 
-              // ensure we do not have garbage
-              size_t offset = thestring.find('\0', 0);
-              if (offset + 1 < optLen) optLen = offset + 1;
-              thestring = thestring.substr(0, optLen);
+              if (!isStdString) {
+
+                // ensure we do not have garbage
+                size_t offset = thestring.find('\0', 0);
+                if (offset + 1 < optLen) optLen = offset + 1;
+                thestring = thestring.substr(0, optLen);
+
+              }
 
             }
 
@@ -923,9 +931,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
     if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
 
-      uint32_t write_loc = afl_global_id;
-
-      if (afl_global_id % 32) write_loc = (((afl_global_id + 32) >> 4) << 4);
+      uint32_t write_loc = (((afl_global_id + 63) >> 6) << 6);
 
       GlobalVariable *AFLFinalLoc = new GlobalVariable(
           M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
index 57ff3b47..0f773aba 100644
--- a/instrumentation/afl-llvm-pass.so.cc
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -62,7 +62,7 @@ typedef long double max_align_t;
 #endif
 
 #include "afl-llvm-common.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 using namespace llvm;
 
@@ -82,9 +82,10 @@ class AFLCoverage : public ModulePass {
 
  protected:
   uint32_t ngram_size = 0;
+  uint32_t ctx_k = 0;
   uint32_t map_size = MAP_SIZE;
   uint32_t function_minimum_size = 1;
-  char *   ctx_str = NULL, *skip_nozero = NULL;
+  char *   ctx_str = NULL, *caller_str = NULL, *skip_nozero = NULL;
 
 };
 
@@ -183,10 +184,16 @@ bool AFLCoverage::runOnModule(Module &M) {
   skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
 
   unsigned PrevLocSize = 0;
+  unsigned PrevCallerSize = 0;
 
   char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
   if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+  char *ctx_k_str = getenv("AFL_LLVM_CTX_K");
+  if (!ctx_k_str) ctx_k_str = getenv("AFL_CTX_K");
   ctx_str = getenv("AFL_LLVM_CTX");
+  caller_str = getenv("AFL_LLVM_CALLER");
+
+  bool instrument_ctx = ctx_str || caller_str;
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
   /* Decide previous location vector size (must be a power of two) */
@@ -204,6 +211,31 @@ bool AFLCoverage::runOnModule(Module &M) {
   if (ngram_size)
     PrevLocSize = ngram_size - 1;
   else
+    PrevLocSize = 1;
+
+  /* Decide K-ctx vector size (must be a power of two) */
+  VectorType *PrevCallerTy = NULL;
+
+  if (ctx_k_str)
+    if (sscanf(ctx_k_str, "%u", &ctx_k) != 1 || ctx_k < 1 || ctx_k > CTX_MAX_K)
+      FATAL("Bad value of AFL_CTX_K (must be between 1 and CTX_MAX_K (%u))",
+            CTX_MAX_K);
+
+  if (ctx_k == 1) {
+
+    ctx_k = 0;
+    instrument_ctx = true;
+    caller_str = ctx_k_str;  // Enable CALLER instead
+
+  }
+
+  if (ctx_k) {
+
+    PrevCallerSize = ctx_k;
+    instrument_ctx = true;
+
+  }
+
 #else
   if (ngram_size_str)
   #ifndef LLVM_VERSION_PATCH
@@ -217,8 +249,20 @@ bool AFLCoverage::runOnModule(Module &M) {
         "%d.%d.%d!",
         LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH);
   #endif
+  if (ctx_k_str)
+  #ifndef LLVM_VERSION_PATCH
+    FATAL(
+        "Sorry, K-CTX branch coverage is not supported with llvm version "
+        "%d.%d.%d!",
+        LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0);
+  #else
+    FATAL(
+        "Sorry, K-CTX branch coverage is not supported with llvm version "
+        "%d.%d.%d!",
+        LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH);
+  #endif
+  PrevLocSize = 1;
 #endif
-    PrevLocSize = 1;
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
   int PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
@@ -231,6 +275,17 @@ bool AFLCoverage::runOnModule(Module &M) {
     );
 #endif
 
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  int PrevCallerVecSize = PowerOf2Ceil(PrevCallerSize);
+  if (ctx_k)
+    PrevCallerTy = VectorType::get(IntLocTy, PrevCallerVecSize
+  #if LLVM_VERSION_MAJOR >= 12
+                                   ,
+                                   false
+  #endif
+    );
+#endif
+
   /* Get globals for the SHM region and the previous location. Note that
      __afl_prev_loc is thread-local. */
 
@@ -238,9 +293,10 @@ bool AFLCoverage::runOnModule(Module &M) {
       new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
                          GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
   GlobalVariable *AFLPrevLoc;
+  GlobalVariable *AFLPrevCaller;
   GlobalVariable *AFLContext = NULL;
 
-  if (ctx_str)
+  if (ctx_str || caller_str)
 #if defined(__ANDROID__) || defined(__HAIKU__)
     AFLContext = new GlobalVariable(
         M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
@@ -275,6 +331,31 @@ bool AFLCoverage::runOnModule(Module &M) {
 #endif
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
+  if (ctx_k)
+  #if defined(__ANDROID__) || defined(__HAIKU__)
+    AFLPrevCaller = new GlobalVariable(
+        M, PrevCallerTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_caller");
+  #else
+    AFLPrevCaller = new GlobalVariable(
+        M, PrevCallerTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_caller",
+        /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+        /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+  #endif
+  else
+#endif
+#if defined(__ANDROID__) || defined(__HAIKU__)
+    AFLPrevCaller =
+        new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, 0,
+                           "__afl_prev_caller");
+#else
+  AFLPrevCaller = new GlobalVariable(
+      M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_caller",
+      0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
   /* Create the vector shuffle mask for updating the previous block history.
      Note that the first element of the vector will store cur_loc, so just set
      it to undef to allow the optimizer to do its thing. */
@@ -288,13 +369,30 @@ bool AFLCoverage::runOnModule(Module &M) {
     PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
 
   Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+
+  Constant *                  PrevCallerShuffleMask = NULL;
+  SmallVector<Constant *, 32> PrevCallerShuffle = {UndefValue::get(Int32Ty)};
+
+  if (ctx_k) {
+
+    for (unsigned I = 0; I < PrevCallerSize - 1; ++I)
+      PrevCallerShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+    for (int I = PrevCallerSize; I < PrevCallerVecSize; ++I)
+      PrevCallerShuffle.push_back(ConstantInt::get(Int32Ty, PrevCallerSize));
+
+    PrevCallerShuffleMask = ConstantVector::get(PrevCallerShuffle);
+
+  }
+
 #endif
 
   // other constants we need
   ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
   ConstantInt *One = ConstantInt::get(Int8Ty, 1);
 
-  LoadInst *PrevCtx = NULL;  // CTX sensitive coverage
+  Value *   PrevCtx = NULL;     // CTX sensitive coverage
+  LoadInst *PrevCaller = NULL;  // K-CTX coverage
 
   /* Instrument all the things! */
 
@@ -318,12 +416,30 @@ bool AFLCoverage::runOnModule(Module &M) {
       IRBuilder<>          IRB(&(*IP));
 
       // Context sensitive coverage
-      if (ctx_str && &BB == &F.getEntryBlock()) {
+      if (instrument_ctx && &BB == &F.getEntryBlock()) {
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+        if (ctx_k) {
+
+          PrevCaller = IRB.CreateLoad(AFLPrevCaller);
+          PrevCaller->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+          PrevCtx =
+              IRB.CreateZExt(IRB.CreateXorReduce(PrevCaller), IRB.getInt32Ty());
+
+        } else
+
+#endif
+        {
 
-        // load the context ID of the previous function and write to to a local
-        // variable on the stack
-        PrevCtx = IRB.CreateLoad(AFLContext);
-        PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+          // load the context ID of the previous function and write to to a
+          // local variable on the stack
+          LoadInst *PrevCtxLoad = IRB.CreateLoad(AFLContext);
+          PrevCtxLoad->setMetadata(M.getMDKindID("nosanitize"),
+                                   MDNode::get(C, None));
+          PrevCtx = PrevCtxLoad;
+
+        }
 
         // does the function have calls? and is any of the calls larger than one
         // basic block?
@@ -354,10 +470,32 @@ bool AFLCoverage::runOnModule(Module &M) {
         // if yes we store a context ID for this function in the global var
         if (has_calls) {
 
-          ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
-          StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
-          StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
-                                MDNode::get(C, None));
+          Value *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+          if (ctx_k) {
+
+            Value *ShuffledPrevCaller = IRB.CreateShuffleVector(
+                PrevCaller, UndefValue::get(PrevCallerTy),
+                PrevCallerShuffleMask);
+            Value *UpdatedPrevCaller = IRB.CreateInsertElement(
+                ShuffledPrevCaller, NewCtx, (uint64_t)0);
+
+            StoreInst *Store =
+                IRB.CreateStore(UpdatedPrevCaller, AFLPrevCaller);
+            Store->setMetadata(M.getMDKindID("nosanitize"),
+                               MDNode::get(C, None));
+
+          } else
+
+#endif
+          {
+
+            if (ctx_str) NewCtx = IRB.CreateXor(PrevCtx, NewCtx);
+            StoreInst *StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+            StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+          }
 
         }
 
@@ -411,13 +549,20 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         // in CTX mode we have to restore the original context for the caller -
         // she might be calling other functions which need the correct CTX
-        if (ctx_str && has_calls) {
+        if (instrument_ctx && has_calls) {
 
           Instruction *Inst = BB.getTerminator();
           if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
             IRBuilder<> Post_IRB(Inst);
-            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+
+            StoreInst *RestoreCtx;
+  #ifdef AFL_HAVE_VECTOR_INTRINSICS
+            if (ctx_k)
+              RestoreCtx = IRB.CreateStore(PrevCaller, AFLPrevCaller);
+            else
+  #endif
+              RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
             RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                     MDNode::get(C, None));
 
@@ -458,7 +603,7 @@ bool AFLCoverage::runOnModule(Module &M) {
 #endif
         PrevLocTrans = PrevLoc;
 
-      if (ctx_str)
+      if (instrument_ctx)
         PrevLocTrans =
             IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
       else
@@ -538,19 +683,27 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1),
                                 AFLPrevLoc);
+        Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
       }
 
       // in CTX mode we have to restore the original context for the caller -
       // she might be calling other functions which need the correct CTX.
       // Currently this is only needed for the Ubuntu clang-6.0 bug
-      if (ctx_str && has_calls) {
+      if (instrument_ctx && has_calls) {
 
         Instruction *Inst = BB.getTerminator();
         if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
           IRBuilder<> Post_IRB(Inst);
-          StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+
+          StoreInst *RestoreCtx;
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+          if (ctx_k)
+            RestoreCtx = IRB.CreateStore(PrevCaller, AFLPrevCaller);
+          else
+#endif
+            RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
           RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                   MDNode::get(C, None));
 
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index b5cc1882..ad334d3b 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -19,12 +19,13 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#include <iostream>
 #include <list>
 #include <string>
 #include <fstream>
 #include <sys/time.h>
-#include "llvm/Config/llvm-config.h"
 
+#include "llvm/Config/llvm-config.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -265,13 +266,20 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       unsigned int  max_size = Val->getType()->getIntegerBitWidth(), cast_size;
       unsigned char do_cast = 0;
 
-      if (!SI->getNumCases() || max_size < 16 || max_size % 8) {
+      if (!SI->getNumCases() || max_size < 16) {
 
         // if (!be_quiet) errs() << "skip trivial switch..\n";
         continue;
 
       }
 
+      if (max_size % 8) {
+
+        max_size = (((max_size / 8) + 1) * 8);
+        do_cast = 1;
+
+      }
+
       IRBuilder<> IRB(SI->getParent());
       IRB.SetInsertPoint(SI);
 
@@ -310,36 +318,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       if (do_cast) {
 
-        ConstantInt *cint = dyn_cast<ConstantInt>(Val);
-        if (cint) {
-
-          uint64_t val = cint->getZExtValue();
-          // fprintf(stderr, "ConstantInt: %lu\n", val);
-          switch (cast_size) {
-
-            case 8:
-              CompareTo = ConstantInt::get(Int8Ty, val);
-              break;
-            case 16:
-              CompareTo = ConstantInt::get(Int16Ty, val);
-              break;
-            case 32:
-              CompareTo = ConstantInt::get(Int32Ty, val);
-              break;
-            case 64:
-              CompareTo = ConstantInt::get(Int64Ty, val);
-              break;
-            case 128:
-              CompareTo = ConstantInt::get(Int128Ty, val);
-              break;
-
-          }
-
-        } else {
-
-          CompareTo = IRB.CreateBitCast(Val, IntegerType::get(C, cast_size));
-
-        }
+        CompareTo =
+            IRB.CreateIntCast(CompareTo, IntegerType::get(C, cast_size), false);
 
       }
 
@@ -361,27 +341,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
           if (do_cast) {
 
-            uint64_t val = cint->getZExtValue();
-            // fprintf(stderr, "ConstantInt: %lu\n", val);
-            switch (cast_size) {
-
-              case 8:
-                new_param = ConstantInt::get(Int8Ty, val);
-                break;
-              case 16:
-                new_param = ConstantInt::get(Int16Ty, val);
-                break;
-              case 32:
-                new_param = ConstantInt::get(Int32Ty, val);
-                break;
-              case 64:
-                new_param = ConstantInt::get(Int64Ty, val);
-                break;
-              case 128:
-                new_param = ConstantInt::get(Int128Ty, val);
-                break;
-
-            }
+            new_param =
+                IRB.CreateIntCast(cint, IntegerType::get(C, cast_size), false);
 
           }
 
@@ -457,7 +418,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       IntegerType *        intTyOp0 = NULL;
       IntegerType *        intTyOp1 = NULL;
       unsigned             max_size = 0, cast_size = 0;
-      unsigned char        attr = 0, do_cast = 0;
+      unsigned char        attr = 0;
       std::vector<Value *> args;
 
       CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst);
@@ -523,7 +484,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           max_size = 128;
 
         attr += 8;
-        do_cast = 1;
 
       } else {
 
@@ -540,7 +500,9 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       }
 
-      if (!max_size || max_size % 8 || max_size < 16) { continue; }
+      if (!max_size || max_size < 16) { continue; }
+
+      if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); }
 
       if (max_size > 128) {
 
@@ -553,7 +515,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
         }
 
         max_size = 128;
-        do_cast = 1;
 
       }
 
@@ -569,92 +530,30 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           break;
         default:
           cast_size = 128;
-          do_cast = 1;
 
       }
 
-      if (do_cast) {
-
-        // F*cking LLVM optimized out any kind of bitcasts of ConstantInt values
-        // creating illegal calls. WTF. So we have to work around this.
-
-        ConstantInt *cint = dyn_cast<ConstantInt>(op0);
-        if (cint) {
-
-          uint64_t val = cint->getZExtValue();
-          // fprintf(stderr, "ConstantInt: %lu\n", val);
-          ConstantInt *new_param = NULL;
-          switch (cast_size) {
-
-            case 8:
-              new_param = ConstantInt::get(Int8Ty, val);
-              break;
-            case 16:
-              new_param = ConstantInt::get(Int16Ty, val);
-              break;
-            case 32:
-              new_param = ConstantInt::get(Int32Ty, val);
-              break;
-            case 64:
-              new_param = ConstantInt::get(Int64Ty, val);
-              break;
-            case 128:
-              new_param = ConstantInt::get(Int128Ty, val);
-              break;
-
-          }
-
-          if (!new_param) { continue; }
-          args.push_back(new_param);
-
-        } else {
-
-          Value *V0 = IRB.CreateBitCast(op0, IntegerType::get(C, cast_size));
-          args.push_back(V0);
-
-        }
-
-        cint = dyn_cast<ConstantInt>(op1);
-        if (cint) {
-
-          uint64_t     val = cint->getZExtValue();
-          ConstantInt *new_param = NULL;
-          switch (cast_size) {
-
-            case 8:
-              new_param = ConstantInt::get(Int8Ty, val);
-              break;
-            case 16:
-              new_param = ConstantInt::get(Int16Ty, val);
-              break;
-            case 32:
-              new_param = ConstantInt::get(Int32Ty, val);
-              break;
-            case 64:
-              new_param = ConstantInt::get(Int64Ty, val);
-              break;
-            case 128:
-              new_param = ConstantInt::get(Int128Ty, val);
-              break;
-
-          }
-
-          if (!new_param) { continue; }
-          args.push_back(new_param);
-
-        } else {
-
-          Value *V1 = IRB.CreateBitCast(op1, IntegerType::get(C, cast_size));
-          args.push_back(V1);
-
-        }
-
-      } else {
-
-        args.push_back(op0);
-        args.push_back(op1);
-
-      }
+      // errs() << "[CMPLOG] cmp  " << *cmpInst << "(in function " <<
+      // cmpInst->getFunction()->getName() << ")\n";
+
+      // first bitcast to integer type of the same bitsize as the original
+      // type (this is a nop, if already integer)
+      Value *op0_i = IRB.CreateBitCast(
+          op0, IntegerType::get(C, op0->getType()->getPrimitiveSizeInBits()));
+      // then create a int cast, which does zext, trunc or bitcast. In our case
+      // usually zext to the next larger supported type (this is a nop if
+      // already the right type)
+      Value *V0 =
+          IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false);
+      args.push_back(V0);
+      Value *op1_i = IRB.CreateBitCast(
+          op1, IntegerType::get(C, op1->getType()->getPrimitiveSizeInBits()));
+      Value *V1 =
+          IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false);
+      args.push_back(V1);
+
+      // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << *V1
+      // << "\n";
 
       ConstantInt *attribute = ConstantInt::get(Int8Ty, attr);
       args.push_back(attribute);
@@ -667,7 +566,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       }
 
       // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n",
-      //         max_size, cast_size, attr, do_cast);
+      //         max_size, cast_size, attr);
 
       switch (cast_size) {
 
diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc
index da5cf7e9..3ecba4e6 100644
--- a/instrumentation/compare-transform-pass.so.cc
+++ b/instrumentation/compare-transform-pass.so.cc
@@ -229,9 +229,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr2 = true;
-                    Str2 = Array->getAsString();
+                    Str2 = Array->getRawDataValues();
                     valueMap[Str2P] = new std::string(Str2.str());
-                    fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+                    // fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
 
                   }
 
@@ -254,7 +254,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString();
+                      Str1 = Array->getRawDataValues();
                       valueMap[Str1P] = new std::string(Str1.str());
                       // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
 
@@ -316,7 +316,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
               uint64_t len = ilen->getZExtValue();
               // if len is zero this is a pointless call but allow real
               // implementation to worry about that
-              if (!len) continue;
+              if (len < 2) continue;
 
               if (isMemcmp) {
 
@@ -362,19 +362,22 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     bool        HasStr1 = getConstantStringInfo(Str1P, Str1);
     bool        HasStr2 = getConstantStringInfo(Str2P, Str2);
     uint64_t    constStrLen, unrollLen, constSizedLen = 0;
-    bool        isMemcmp =
-        !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
-    bool isSizedcmp = isMemcmp ||
-                      !callInst->getCalledFunction()->getName().compare(
-                          StringRef("strncmp")) ||
-                      !callInst->getCalledFunction()->getName().compare(
-                          StringRef("strncasecmp"));
+    bool        isMemcmp = false;
+    bool        isSizedcmp = false;
+    bool        isCaseInsensitive = false;
+    Function *  Callee = callInst->getCalledFunction();
+    if (Callee) {
+
+      isMemcmp = Callee->getName().compare("memcmp") == 0;
+      isSizedcmp = isMemcmp || Callee->getName().compare("strncmp") == 0 ||
+                   Callee->getName().compare("strncasecmp") == 0;
+      isCaseInsensitive = Callee->getName().compare("strcasecmp") == 0 ||
+                          Callee->getName().compare("strncasecmp") == 0;
+
+    }
+
     Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL;
     bool   isConstSized = sizedValue && isa<ConstantInt>(sizedValue);
-    bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare(
-                                 StringRef("strcasecmp")) ||
-                             !callInst->getCalledFunction()->getName().compare(
-                                 StringRef("strncasecmp"));
 
     if (!(HasStr1 || HasStr2)) {
 
@@ -391,7 +394,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
         if (val && !val->empty()) {
 
           Str2 = StringRef(*val);
-          HasStr2 = true;
+          // HasStr2 = true;
 
         }
 
@@ -417,15 +420,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    if (TmpConstStr.length() < 2 ||
+        (TmpConstStr.length() == 2 && !TmpConstStr[1])) {
+
+      continue;
+
+    }
+
     // add null termination character implicit in c strings
-    TmpConstStr.append("\0", 1);
+    if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) {
+
+      TmpConstStr.append("\0", 1);
+
+    }
 
     // in the unusual case the const str has embedded null
     // characters, the string comparison functions should terminate
     // at the first null
-    if (!isMemcmp)
+    if (!isMemcmp) {
+
       TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
 
+    }
+
     constStrLen = TmpConstStr.length();
     // prefer use of StringRef (in comparison to std::string a StringRef has
     // built-in runtime bounds checking, which makes debugging easier)
@@ -436,15 +453,6 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     else
       unrollLen = constStrLen;
 
-    /*
-        if (!be_quiet)
-          errs() << callInst->getCalledFunction()->getName() << ": unroll len "
-                 << unrollLen
-                 << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ":
-       "
-                 << ConstStr << "\n";
-    */
-
     /* split before the call instruction */
     BasicBlock *bb = callInst->getParent();
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst));
diff --git a/instrumentation/llvm-ngram-coverage.h b/instrumentation/llvm-alternative-coverage.h
index 666839c8..0d7b3957 100644
--- a/instrumentation/llvm-ngram-coverage.h
+++ b/instrumentation/llvm-alternative-coverage.h
@@ -14,5 +14,8 @@ typedef u64 PREV_LOC_T;
 /* Maximum ngram size */
 #define NGRAM_SIZE_MAX 16U
 
+/* Maximum K for top-K context sensitivity */
+#define CTX_MAX_K 32U
+
 #endif
 
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index b6d8c466..b02a89fb 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -149,8 +149,11 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) {
     auto op1 = FcmpInst->getOperand(1);
 
     /* find out what the new predicate is going to be */
-    auto               pred = dyn_cast<CmpInst>(FcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(FcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     switch (pred) {
 
       case CmpInst::FCMP_UGE:
@@ -276,8 +279,11 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
     auto op1 = IcmpInst->getOperand(1);
 
     /* find out what the new predicate is going to be */
-    auto               pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     switch (pred) {
 
       case CmpInst::ICMP_UGE:
@@ -407,12 +413,16 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
     auto op1 = IcmpInst->getOperand(1);
 
     IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+    if (!intTyOp0) { continue; }
     unsigned     bitw = intTyOp0->getBitWidth();
     IntegerType *IntType = IntegerType::get(C, bitw);
 
     /* get the new predicate */
-    auto               pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     if (pred == CmpInst::ICMP_SGT) {
 
       new_pred = CmpInst::ICMP_UGT;
@@ -602,12 +612,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     if (op_size != op1->getType()->getPrimitiveSizeInBits()) { continue; }
 
     const unsigned int sizeInBits = op0->getType()->getPrimitiveSizeInBits();
+
+    // BUG FIXME TODO: u64 does not work for > 64 bit ... e.g. 80 and 128 bit
+    if (sizeInBits > 64) { continue; }
+
     const unsigned int precision = sizeInBits == 32    ? 24
                                    : sizeInBits == 64  ? 53
                                    : sizeInBits == 128 ? 113
                                    : sizeInBits == 16  ? 11
-                                                      /* sizeInBits == 80 */
-                                                      : 65;
+                                   : sizeInBits == 80  ? 65
+                                                       : sizeInBits - 8;
 
     const unsigned           shiftR_exponent = precision - 1;
     const unsigned long long mask_fraction =
@@ -1111,7 +1125,9 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) {
     auto op0 = IcmpInst->getOperand(0);
     auto op1 = IcmpInst->getOperand(1);
 
-    auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto pred = cmp_inst->getPredicate();
 
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
 
@@ -1300,12 +1316,9 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 
     case 64:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
       bitw >>= 1;
 #if LLVM_VERSION_MAJOR > 3 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
@@ -1313,12 +1326,9 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 #endif
     case 32:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
       bitw >>= 1;
 #if LLVM_VERSION_MAJOR > 3 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
@@ -1326,13 +1336,10 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 #endif
     case 16:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
-      bitw >>= 1;
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
+      // bitw >>= 1;
       break;
 
     default: