about summary refs log tree commit diff
path: root/llvm_mode
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2020-05-06 00:58:13 +0200
committerGitHub <noreply@github.com>2020-05-06 00:58:13 +0200
commitdf5215783414ddda7d9f371ccef5acb2235f66d0 (patch)
tree52ca748f7a90c9deb09d9380c19f8220f0f45105 /llvm_mode
parentc7de368dc20078116bcb2e34b0f2237127802841 (diff)
parenta13958b32b6a1d8cba6f82b0d1ad03801721e3ef (diff)
downloadafl++-df5215783414ddda7d9f371ccef5acb2235f66d0.tar.gz
Merge pull request #352 from AFLplusplus/dev
Pull to master because of crash in string compare transform
Diffstat (limited to 'llvm_mode')
-rw-r--r--llvm_mode/GNUmakefile115
-rw-r--r--llvm_mode/LLVMInsTrim.so.cc267
-rw-r--r--llvm_mode/README.lto.md2
-rw-r--r--llvm_mode/README.md21
-rw-r--r--llvm_mode/README.neverzero.md10
-rw-r--r--llvm_mode/afl-clang-fast.c192
-rw-r--r--llvm_mode/afl-llvm-lto-instrumentation.so.cc30
-rw-r--r--llvm_mode/afl-llvm-pass.so.cc156
-rw-r--r--llvm_mode/afl-llvm-rt.o.c15
-rw-r--r--llvm_mode/compare-transform-pass.so.cc55
10 files changed, 599 insertions, 264 deletions
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile
index 69b0875e..93886e47 100644
--- a/llvm_mode/GNUmakefile
+++ b/llvm_mode/GNUmakefile
@@ -38,7 +38,7 @@ else
   LLVM_CONFIG ?= llvm-config
 endif
 
-LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null )
+LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/git//' )
 LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 )
 LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
 LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//')
@@ -82,30 +82,79 @@ endif
 # this seems to be busted on some distros, so using the one in $PATH is
 # probably better.
 
-CC         ?= $(LLVM_BINDIR)/clang
-CXX        ?= $(LLVM_BINDIR)/clang++
+CC         = $(LLVM_BINDIR)/clang
+CXX        = $(LLVM_BINDIR)/clang++
 
+# llvm-config --bindir may not providing a valid path, so ...
 ifeq "$(shell test -e $(CC) || echo 1 )" "1"
-  # llvm-config --bindir may not providing a valid path, so ...
-  ifeq "$(shell test -e '$(BIN_DIR)/clang' && echo 1)" "1"
-    # we found one in the local install directory, lets use these
-    CC         = $(BIN_DIR)/clang
-    CXX        = $(BIN_DIR)/clang++
-  else
-    # hope for the best
-    $(warning we have trouble finding clang/clang++ - llvm-config is not helping us)
-    CC         = clang
-    CXX        = clang++
+  # however we must ensure that this is not a "CC=gcc make"
+  ifeq "$(shell command -v $(CC) 2> /dev/null)" ""
+    # we do not have a valid CC variable so we try alternatives
+    ifeq "$(shell test -e '$(BIN_DIR)/clang' && echo 1)" "1"
+      # we found one in the local install directory, lets use these
+      CC         = $(BIN_DIR)/clang
+    else
+      # hope for the best
+      $(warning we have trouble finding clang - llvm-config is not helping us)
+      CC         = clang
+    endif
+  endif
+endif
+# llvm-config --bindir may not providing a valid path, so ...
+ifeq "$(shell test -e $(CXX) || echo 1 )" "1"
+  # however we must ensure that this is not a "CC=gcc make"
+  ifeq "$(shell command -v $(CXX) 2> /dev/null)" ""
+    # we do not have a valid CC variable so we try alternatives
+    ifeq "$(shell test -e '$(BIN_DIR)/clang++' && echo 1)" "1"
+      # we found one in the local install directory, lets use these
+      CXX        = $(BIN_DIR)/clang++
+    else
+      # hope for the best
+      $(warning we have trouble finding clang++ - llvm-config is not helping us)
+      CXX        = clang++
+    endif
   endif
 endif
 
 # sanity check.
 # Are versions of clang --version and llvm-config --version equal?
-CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*version\ ([0-9]\.[0-9]\.[0-9]).*/s//\1/p')
+CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*version\ (1?[0-9]\.[0-9]\.[0-9]).*/s//\1/p')
 
-ifneq "$(CLANGVER)" "$(LLVMVER)"
-  CC = $(shell $(LLVM_CONFIG) --bindir)/clang
-  CXX = $(shell $(LLVM_CONFIG) --bindir)/clang++
+# I disable this because it does not make sense with what we did before (marc)
+# We did exactly set these 26 lines above with these values, and it would break
+# "CC=gcc make" etc. usages
+ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
+  CC_SAVE := $(LLVM_BINDIR)/clang
+else
+  CC_SAVE := $(CC)
+endif
+ifeq "$(findstring clang, $(shell $(CXX) --version 2>/dev/null))" ""
+  CXX_SAVE := $(LLVM_BINDIR)/clang++
+else
+  CXX_SAVE := $(CXX)
+endif
+
+CLANG_BIN := $(CC_SAVE)
+CLANGPP_BIN := $(CXX_SAVE)
+
+ifeq "$(CC_SAVE)" "$(LLVM_BINDIR)/clang"
+  USE_BINDIR = 1
+else
+  ifeq "$(CXX_SAVE)" "$(LLVM_BINDIR)/clang++"
+    USE_BINDIR = 1
+  else
+    USE_BINDIR = 0
+  endif
+endif
+
+# On old platform we cannot compile with clang because std++ libraries are too
+# old. For these we need to use gcc/g++, so if we find REAL_CC and REAL_CXX
+# variable we override the compiler variables here
+ifneq "$(REAL_CC)" ""
+CC         = $(REAL_CC)
+endif
+ifneq "$(REAL_CXX)" ""
+CXX        = $(REAL_CXX)
 endif
 
 # After we set CC/CXX we can start makefile magic tests
@@ -114,13 +163,13 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .te
 	CFLAGS_OPT = -march=native
 endif
 
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
         AFL_CLANG_FLTO ?= -flto=full
 else
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+ ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
         AFL_CLANG_FLTO ?= -flto=thin
  else
-  ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+  ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
         AFL_CLANG_FLTO ?= -flto
   endif
  endif
@@ -141,24 +190,11 @@ endif
 
 AFL_CLANG_FUSELD=
 ifneq "$(AFL_CLANG_FLTO)" ""
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -fuse-ld=`command -v ld` -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -fuse-ld=`command -v ld` -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
   AFL_CLANG_FUSELD=1
 endif
 endif
 
-CLANG_BIN = $(basename $(CC))
-CLANGPP_BIN = $(basename $(CXX))
-ifeq "$(shell test -e $(CLANG_BIN) || echo 1 )" "1"
-  CLANG_BIN = $(CC)
-  CLANGPP_BIN = $(CXX)
-endif
-
-ifeq "$(CC)" "$(LLVM_BINDIR)/clang"
-  USE_BINDIR = 1
-else
-  USE_BINDIR = 0
-endif
-
 CFLAGS          ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2
 override CFLAGS += -Wall \
                -g -Wno-pointer-sign -I ../include/ \
@@ -166,7 +202,7 @@ override CFLAGS += -Wall \
                -DLLVM_BINDIR=\"$(LLVM_BINDIR)\" -DVERSION=\"$(VERSION)\" \
                -DLLVM_VERSION=\"$(LLVMVER)\"  -DAFL_CLANG_FLTO=\"$(AFL_CLANG_FLTO)\" \
                -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -DAFL_CLANG_FUSELD=\"$(AFL_CLANG_FUSELD)\" \
-               -DCLANG_BIN=\"$(CC)\" -DCLANGPP_BIN=\"$(CXX)\" -DUSE_BINDIR=$(USE_BINDIR) -Wno-unused-function
+               -DCLANG_BIN=\"$(CLANG_BIN)\" -DCLANGPP_BIN=\"$(CLANGPP_BIN)\" -DUSE_BINDIR=$(USE_BINDIR) -Wno-unused-function
 ifdef AFL_TRACE_PC
   $(info Compile option AFL_TRACE_PC is deprecated, just set AFL_LLVM_INSTRUMENT=PCGUARD to activate when compiling targets )
 endif
@@ -249,7 +285,6 @@ test_deps:
 	@echo "[*] Checking for matching versions of '$(CC)' and '$(LLVM_CONFIG)'"
 ifneq "$(CLANGVER)" "$(LLVMVER)"
 	@echo "[!] WARNING: we have llvm-config version $(LLVMVER) and a clang version $(CLANGVER)"
-	@echo "[!] Retrying with the clang compiler from llvm: CC=`llvm-config --bindir`/clang"
 else
 	@echo "[*] We have llvm-config version $(LLVMVER) with a clang version $(CLANGVER), good."
 endif
@@ -290,9 +325,9 @@ endif
 ../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc afl-llvm-common.o
 ifeq "$(LLVM_LTO)" "1"
 	$(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) afl-llvm-common.o
-	$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto.o
-	@$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m64 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-64.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
-	@$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m32 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-32.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
+	$(CLANG_BIN) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto.o
+	@$(CLANG_BIN) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m64 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-64.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
+	@$(CLANG_BIN) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m32 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-32.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
 endif
 
 # laf
@@ -323,7 +358,7 @@ endif
 
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
+	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
 	ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc
index 98263ef1..ad046a8b 100644
--- a/llvm_mode/LLVMInsTrim.so.cc
+++ b/llvm_mode/LLVMInsTrim.so.cc
@@ -36,11 +36,12 @@ typedef long double max_align_t;
 #include <string>
 #include <fstream>
 
-#include "config.h"
-#include "debug.h"
-
 #include "MarkNodes.h"
 #include "afl-llvm-common.h"
+#include "llvm-ngram-coverage.h"
+
+#include "config.h"
+#include "debug.h"
 
 using namespace llvm;
 
@@ -56,6 +57,7 @@ struct InsTrim : public ModulePass {
  protected:
   uint32_t function_minimum_size = 1;
   uint32_t debug = 0;
+  char *   skip_nozero = NULL;
 
  private:
   std::mt19937 generator;
@@ -93,9 +95,15 @@ struct InsTrim : public ModulePass {
 
   }
 
+#if LLVM_VERSION_MAJOR >= 4 || \
+    (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1)
+#define AFL_HAVE_VECTOR_INTRINSICS 1
+#endif
+
   bool runOnModule(Module &M) override {
 
     char be_quiet = 0;
+    int  ngram_size = 0;
 
     if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
 
@@ -107,11 +115,17 @@ struct InsTrim : public ModulePass {
 
     if (getenv("AFL_DEBUG") != NULL) debug = 1;
 
+    LLVMContext &C = M.getContext();
+
+    IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+    IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+
 #if LLVM_VERSION_MAJOR < 9
     char *neverZero_counters_str;
     if ((neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO")) != NULL)
       if (!be_quiet) OKF("LLVM neverZero activated (by hexcoder)\n");
 #endif
+    skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
 
     if (getenv("AFL_LLVM_INSTRIM_LOOPHEAD") != NULL ||
         getenv("LOOPHEAD") != NULL) {
@@ -123,24 +137,107 @@ struct InsTrim : public ModulePass {
     if (getenv("AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK") != NULL)
       function_minimum_size = 2;
 
-    // this is our default
-    MarkSetOpt = true;
+    unsigned PrevLocSize = 0;
+    char *   ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
+    if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+    char *ctx_str = getenv("AFL_LLVM_CTX");
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+    /* Decide previous location vector size (must be a power of two) */
+    VectorType *PrevLocTy;
+
+    if (ngram_size_str)
+      if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 ||
+          ngram_size > NGRAM_SIZE_MAX)
+        FATAL(
+            "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX "
+            "(%u))",
+            NGRAM_SIZE_MAX);
+
+    if (ngram_size)
+      PrevLocSize = ngram_size - 1;
+    else
+#else
+    if (ngram_size_str)
+      FATAL(
+          "Sorry, NGRAM branch coverage is not supported with llvm version %s!",
+          LLVM_VERSION_STRING);
+#endif
+      PrevLocSize = 1;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+    // IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
+    uint64_t     PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
+    IntegerType *IntLocTy =
+        IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT);
+    if (ngram_size) PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize);
+#endif
 
-    LLVMContext &C = M.getContext();
-    IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
-    IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+    /* Get globals for the SHM region and the previous location. Note that
+       __afl_prev_loc is thread-local. */
 
-    GlobalVariable *CovMapPtr = new GlobalVariable(
-        M, PointerType::getUnqual(Int8Ty), false, GlobalValue::ExternalLinkage,
-        nullptr, "__afl_area_ptr");
+    GlobalVariable *AFLMapPtr =
+        new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
+                           GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+    GlobalVariable *AFLPrevLoc;
+    GlobalVariable *AFLContext;
+    LoadInst *      PrevCtx = NULL;  // for CTX sensitive coverage
 
-    GlobalVariable *OldPrev = new GlobalVariable(
+    if (ctx_str)
+#ifdef __ANDROID__
+      AFLContext = new GlobalVariable(
+          M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
+#else
+      AFLContext = new GlobalVariable(
+          M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx",
+          0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+    if (ngram_size)
+#ifdef __ANDROID__
+      AFLPrevLoc = new GlobalVariable(
+          M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+          /* Initializer */ nullptr, "__afl_prev_loc");
+#else
+      AFLPrevLoc = new GlobalVariable(
+          M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+          /* Initializer */ nullptr, "__afl_prev_loc",
+          /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+          /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+#endif
+    else
+#endif
+#ifdef __ANDROID__
+      AFLPrevLoc = new GlobalVariable(
+          M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc");
+#else
+    AFLPrevLoc = new GlobalVariable(
         M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0,
         GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+    /* Create the vector shuffle mask for updating the previous block history.
+       Note that the first element of the vector will store cur_loc, so just set
+       it to undef to allow the optimizer to do its thing. */
+
+    SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)};
+
+    for (unsigned I = 0; I < PrevLocSize - 1; ++I)
+      PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+    for (unsigned I = PrevLocSize; I < PrevLocVecSize; ++I)
+      PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
+
+    Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+#endif
+
+    // this is our default
+    MarkSetOpt = true;
 
     ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
     ConstantInt *One = ConstantInt::get(Int8Ty, 1);
-    ConstantInt *One32 = ConstantInt::get(Int32Ty, 1);
 
     u64 total_rs = 0;
     u64 total_hs = 0;
@@ -238,13 +335,30 @@ struct InsTrim : public ModulePass {
 
         }
 
-        if (function_minimum_size < 2) {
+        for (BasicBlock &BB : F) {
+
+          if (MS.find(&BB) == MS.end()) { continue; }
+          IRBuilder<> IRB(&*BB.getFirstInsertionPt());
+
+          if (ngram_size) {
+
+            LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
+            PrevLoc->setMetadata(M.getMDKindID("nosanitize"),
+                                 MDNode::get(C, None));
 
-          for (BasicBlock &BB : F) {
+            Value *ShuffledPrevLoc = IRB.CreateShuffleVector(
+                PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask);
+            Value *UpdatedPrevLoc = IRB.CreateInsertElement(
+                ShuffledPrevLoc, ConstantInt::get(Int32Ty, genLabel()),
+                (uint64_t)0);
 
-            if (MS.find(&BB) == MS.end()) { continue; }
-            IRBuilder<> IRB(&*BB.getFirstInsertionPt());
-            IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), OldPrev);
+            IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc)
+                ->setMetadata(M.getMDKindID("nosanitize"),
+                              MDNode::get(C, None));
+
+          } else {
+
+            IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), AFLPrevLoc);
 
           }
 
@@ -252,18 +366,67 @@ struct InsTrim : public ModulePass {
 
       }
 
+      int has_calls = 0;
       for (BasicBlock &BB : F) {
 
-        if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
+        auto         PI = pred_begin(&BB);
+        auto         PE = pred_end(&BB);
+        IRBuilder<>  IRB(&*BB.getFirstInsertionPt());
+        Value *      L = NULL;
+        unsigned int cur_loc;
+
+        // Context sensitive coverage
+        if (ctx_str && &BB == &F.getEntryBlock()) {
+
+          PrevCtx = IRB.CreateLoad(AFLContext);
+          PrevCtx->setMetadata(M.getMDKindID("nosanitize"),
+                               MDNode::get(C, None));
+
+          // does the function have calls? and is any of the calls larger than
+          // one basic block?
+          has_calls = 0;
+          for (auto &BB : F) {
+
+            if (has_calls) break;
+            for (auto &IN : BB) {
+
+              CallInst *callInst = nullptr;
+              if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+                Function *Callee = callInst->getCalledFunction();
+                if (!Callee || Callee->size() < 2)
+                  continue;
+                else {
+
+                  has_calls = 1;
+                  break;
+
+                }
+
+              }
 
-        auto        PI = pred_begin(&BB);
-        auto        PE = pred_end(&BB);
-        IRBuilder<> IRB(&*BB.getFirstInsertionPt());
-        Value *     L = NULL;
+            }
+
+          }
+
+          // if yes we store a context ID for this function in the global var
+          if (has_calls) {
+
+            ConstantInt *NewCtx = ConstantInt::get(Int32Ty, genLabel());
+            StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+            StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+          }
+
+        }  // END of ctx_str
+
+        if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
 
-        if (function_minimum_size < 2 && PI == PE) {
+        if (PI == PE) {
 
-          L = ConstantInt::get(Int32Ty, genLabel());
+          cur_loc = genLabel();
+          L = ConstantInt::get(Int32Ty, cur_loc);
 
         } else {
 
@@ -274,6 +437,7 @@ struct InsTrim : public ModulePass {
             BasicBlock *PBB = *PI;
             auto        It = PredMap.insert({PBB, genLabel()});
             unsigned    Label = It.first->second;
+            cur_loc = Label;
             PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
 
           }
@@ -283,15 +447,37 @@ struct InsTrim : public ModulePass {
         }
 
         /* Load prev_loc */
-        LoadInst *PrevLoc = IRB.CreateLoad(OldPrev);
+        LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
         PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-        Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+        Value *PrevLocTrans;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+        /* "For efficiency, we propose to hash the tuple as a key into the
+           hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where
+           prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */
+
+        if (ngram_size)
+          PrevLocTrans =
+              IRB.CreateZExt(IRB.CreateXorReduce(PrevLoc), IRB.getInt32Ty());
+        else
+#endif
+          PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+
+        if (ctx_str)
+          PrevLocTrans =
+              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
 
         /* Load SHM pointer */
-        LoadInst *MapPtr = IRB.CreateLoad(CovMapPtr);
+        LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
         MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-        Value *MapPtrIdx =
-            IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, L));
+        Value *MapPtrIdx;
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+        if (ngram_size)
+          MapPtrIdx = IRB.CreateGEP(
+              MapPtr, IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, L), Int32Ty));
+        else
+#endif
+          MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, L));
 
         /* Update bitmap */
         LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
@@ -304,8 +490,7 @@ struct InsTrim : public ModulePass {
             NULL)  // with llvm 9 we make this the default as the bug in llvm is
                    // then fixed
 #else
-        if (1)  // with llvm 9 we make this the default as the bug in llvm is
-                // then fixed
+        if (!skip_nozero)
 #endif
         {
 
@@ -328,12 +513,20 @@ struct InsTrim : public ModulePass {
         IRB.CreateStore(Incr, MapPtrIdx)
             ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-        // save the actually location ID to OldPrev if function_minimum_size > 1
-        if (function_minimum_size > 1) {
+        if (ctx_str && has_calls) {
+
+          // in CTX mode we have to restore the original context for the
+          // caller - she might be calling other functions which need the
+          // correct CTX
+          Instruction *Inst = BB.getTerminator();
+          if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
-          Value *Shr = IRB.CreateLShr(L, One32);
-          IRB.CreateStore(Shr, OldPrev)
-              ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+            IRBuilder<> Post_IRB(Inst);
+            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+            RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                    MDNode::get(C, None));
+
+          }
 
         }
 
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md
index bb66b5e7..d8e4766d 100644
--- a/llvm_mode/README.lto.md
+++ b/llvm_mode/README.lto.md
@@ -65,7 +65,7 @@ $ cd build
 $ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
 $ make -j $(nproc)
 $ export PATH=`pwd`/bin:$PATH
-$ export LLVM_CONFIG=`pwd`/bin/llcm-config
+$ export LLVM_CONFIG=`pwd`/bin/llvm-config
 $ cd /path/to/AFLplusplus/
 $ make
 $ cd llvm_mode
diff --git a/llvm_mode/README.md b/llvm_mode/README.md
index 607350fb..0bff1ff1 100644
--- a/llvm_mode/README.md
+++ b/llvm_mode/README.md
@@ -37,7 +37,26 @@ co-exists with the original code.
 
 The idea and much of the implementation comes from Laszlo Szekeres.
 
-## 2) How to use this
+## 2a) How to use this - short
+
+Set the `LLVM_CONFIG` variable to the clang version you want to use, e.g.
+```
+LLVM_CONFIG=llvm-config-9 make
+```
+In case you have your own compiled llvm version specify the full path:
+```
+LLVM_CONFIG=~/llvm-project/build/bin/llvm-config make
+```
+If you try to use a new llvm version on an old Linux this can fail because of
+old c++ libraries. In this case usually switching to gcc/g++ to compile
+llvm_mode will work:
+```
+LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make
+```
+It is highly recommended to use the newest clang version you can put your
+hands on :)
+
+## 2b) How to use this - long
 
 In order to leverage this mechanism, you need to have clang installed on your
 system. You should also make sure that the llvm-config tool is in your path
diff --git a/llvm_mode/README.neverzero.md b/llvm_mode/README.neverzero.md
index 1e406560..903e5bd3 100644
--- a/llvm_mode/README.neverzero.md
+++ b/llvm_mode/README.neverzero.md
@@ -20,8 +20,16 @@ This is implemented in afl-gcc, however for llvm_mode this is optional if
 the llvm version is below 9 - as there is a perfomance bug that is only fixed
 in version 9 and onwards.
 
-If you want to enable this for llvm < 9 then set
+If you want to enable this for llvm versions below 9 then set
 
 ```
 export AFL_LLVM_NOT_ZERO=1
 ```
+
+In case you are on llvm 9 or greater and you do not want this behaviour then
+you can set:
+```
+AFL_LLVM_SKIP_NEVERZERO=1
+```
+If the target does not have extensive loops or functions that are called
+a lot then this can give a small performance boost.
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index 3de5fd7d..2d1b427c 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -43,7 +43,7 @@ static u8 * obj_path;                  /* Path to runtime libraries         */
 static u8 **cc_params;                 /* Parameters passed to the real CC  */
 static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 static u8   llvm_fullpath[PATH_MAX];
-static u8   instrument_mode;
+static u8   instrument_mode, instrument_opt_mode, ngram_size;
 static u8 * lto_flag = AFL_CLANG_FLTO;
 static u8 * march_opt = CFLAGS_OPT;
 static u8   debug;
@@ -60,14 +60,15 @@ enum {
   INSTRUMENT_INSTRIM = 2,
   INSTRUMENT_CFG = 2,
   INSTRUMENT_LTO = 3,
-  INSTRUMENT_CTX = 4,
-  INSTRUMENT_NGRAM = 5  // + ngram value of 2-16 = 7 - 21
+  INSTRUMENT_OPT_CTX = 4,
+  INSTRUMENT_OPT_NGRAM = 8
 
 };
 
-char instrument_mode_string[6][16] = {
+char instrument_mode_string[10][16] = {
 
-    "DEFAULT", "PCGUARD", "CFG", "LTO", "CTX",
+    "CLASSIC", "PCGUARD", "CFG",   "LTO", "CTX", "",
+    "",        "",        "NGRAM", ""
 
 };
 
@@ -562,54 +563,6 @@ int main(int argc, char **argv, char **envp) {
   instrument_mode = INSTRUMENT_PCGUARD;
 #endif
 
-  if ((ptr = getenv("AFL_LLVM_INSTRUMENT")) != NULL) {
-
-    if (strncasecmp(ptr, "default", strlen("default")) == 0 ||
-        strncasecmp(ptr, "afl", strlen("afl")) == 0 ||
-        strncasecmp(ptr, "classic", strlen("classic")) == 0)
-      instrument_mode = INSTRUMENT_DEFAULT;
-    if (strncasecmp(ptr, "cfg", strlen("cfg")) == 0 ||
-        strncasecmp(ptr, "instrim", strlen("instrim")) == 0)
-      instrument_mode = INSTRUMENT_CFG;
-    else if (strncasecmp(ptr, "pc-guard", strlen("pc-guard")) == 0 ||
-             strncasecmp(ptr, "pcguard", strlen("pcgard")) == 0)
-      instrument_mode = INSTRUMENT_PCGUARD;
-    else if (strncasecmp(ptr, "lto", strlen("lto")) == 0)
-      instrument_mode = INSTRUMENT_LTO;
-    else if (strncasecmp(ptr, "ctx", strlen("ctx")) == 0) {
-
-      instrument_mode = INSTRUMENT_CTX;
-      setenv("AFL_LLVM_CTX", "1", 1);
-
-    } else if (strncasecmp(ptr, "ngram", strlen("ngram")) == 0) {
-
-      ptr += strlen("ngram");
-      while (*ptr && (*ptr < '0' || *ptr > '9'))
-        ptr++;
-      if (!*ptr)
-        if ((ptr = getenv("AFL_LLVM_NGRAM_SIZE")) != NULL)
-          FATAL(
-              "you must set the NGRAM size with (e.g. for value 2) "
-              "AFL_LLVM_INSTRUMENT=ngram-2");
-      instrument_mode = INSTRUMENT_NGRAM + atoi(ptr);
-      if (instrument_mode < INSTRUMENT_NGRAM + 2 ||
-          instrument_mode > INSTRUMENT_NGRAM + NGRAM_SIZE_MAX)
-        FATAL(
-            "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX "
-            "(%u)",
-            NGRAM_SIZE_MAX);
-
-      ptr = alloc_printf("%u", instrument_mode - INSTRUMENT_NGRAM);
-      setenv("AFL_LLVM_NGRAM_SIZE", ptr, 1);
-
-    } else if (strncasecmp(ptr, "classic", strlen("classic")) != 0 ||
-
-               strncasecmp(ptr, "default", strlen("default")) != 0 ||
-               strncasecmp(ptr, "afl", strlen("afl")) != 0)
-      FATAL("unknown AFL_LLVM_INSTRUMENT value: %s", ptr);
-
-  }
-
   if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
       getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) {
 
@@ -631,39 +584,116 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
-  if (getenv("AFL_LLVM_CTX")) {
+  if (getenv("AFL_LLVM_CTX")) instrument_opt_mode |= INSTRUMENT_OPT_CTX;
 
-    if (instrument_mode == 0)
-      instrument_mode = INSTRUMENT_CTX;
-    else if (instrument_mode != INSTRUMENT_CTX)
-      FATAL("you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_CTX together");
+  if (getenv("AFL_LLVM_NGRAM_SIZE")) {
+
+    instrument_opt_mode |= INSTRUMENT_OPT_NGRAM;
+    ngram_size = atoi(getenv("AFL_LLVM_NGRAM_SIZE"));
+    if (ngram_size < 2 || ngram_size > NGRAM_SIZE_MAX)
+      FATAL(
+          "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX "
+          "(%u)",
+          NGRAM_SIZE_MAX);
 
   }
 
-  if (getenv("AFL_LLVM_NGRAM_SIZE")) {
+  if (getenv("AFL_LLVM_INSTRUMENT")) {
 
-    if (instrument_mode == 0) {
+    u8 *ptr = strtok(getenv("AFL_LLVM_INSTRUMENT"), ":,;");
 
-      instrument_mode = INSTRUMENT_NGRAM + atoi(getenv("AFL_LLVM_NGRAM_SIZE"));
-      if (instrument_mode < INSTRUMENT_NGRAM + 2 ||
-          instrument_mode > INSTRUMENT_NGRAM + NGRAM_SIZE_MAX)
-        FATAL(
-            "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX "
-            "(%u)",
-            NGRAM_SIZE_MAX);
+    while (ptr) {
 
-    } else if (instrument_mode != INSTRUMENT_NGRAM)
+      if (strncasecmp(ptr, "default", strlen("default")) == 0 ||
+          strncasecmp(ptr, "afl", strlen("afl")) == 0 ||
+          strncasecmp(ptr, "classic", strlen("classic")) == 0) {
 
-      FATAL(
-          "you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_NGRAM_SIZE "
-          "together");
+        if (!instrument_mode || instrument_mode == INSTRUMENT_DEFAULT)
+          instrument_mode = INSTRUMENT_DEFAULT;
+        else
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+
+      }
+
+      if (strncasecmp(ptr, "pc-guard", strlen("pc-guard")) == 0 ||
+          strncasecmp(ptr, "pcguard", strlen("pcgard")) == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_PCGUARD)
+          instrument_mode = INSTRUMENT_PCGUARD;
+        else
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+
+      }
+
+      if (strncasecmp(ptr, "cfg", strlen("cfg")) == 0 ||
+          strncasecmp(ptr, "instrim", strlen("instrim")) == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_CFG)
+          instrument_mode = INSTRUMENT_CFG;
+        else
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+
+      }
+
+      if (strncasecmp(ptr, "lto", strlen("lto")) == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_LTO)
+          instrument_mode = INSTRUMENT_LTO;
+        else
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+
+      }
+
+      if (strncasecmp(ptr, "ctx", strlen("ctx")) == 0) {
+
+        instrument_opt_mode |= INSTRUMENT_OPT_CTX;
+        setenv("AFL_LLVM_CTX", "1", 1);
+
+      }
+
+      if (strncasecmp(ptr, "ngram", strlen("ngram")) == 0) {
+
+        ptr += strlen("ngram");
+        while (*ptr && (*ptr < '0' || *ptr > '9'))
+          ptr++;
+        if (!*ptr)
+          if ((ptr = getenv("AFL_LLVM_NGRAM_SIZE")) != NULL)
+            FATAL(
+                "you must set the NGRAM size with (e.g. for value 2) "
+                "AFL_LLVM_INSTRUMENT=ngram-2");
+        ngram_size = atoi(ptr);
+        if (ngram_size < 2 || ngram_size > NGRAM_SIZE_MAX)
+          FATAL(
+              "NGRAM instrumentation option must be between 2 and "
+              "NGRAM_SIZE_MAX "
+              "(%u)",
+              NGRAM_SIZE_MAX);
+        instrument_opt_mode |= (INSTRUMENT_OPT_NGRAM);
+        ptr = alloc_printf("%u", ngram_size);
+        setenv("AFL_LLVM_NGRAM_SIZE", ptr, 1);
+
+      }
+
+      ptr = strtok(NULL, ":,;");
+
+    }
 
   }
 
-  if (instrument_mode < INSTRUMENT_NGRAM)
+  if (!instrument_opt_mode)
     ptr = instrument_mode_string[instrument_mode];
+  else if (instrument_opt_mode == INSTRUMENT_OPT_CTX)
+    ptr = alloc_printf("%s + CTX", instrument_mode_string[instrument_mode]);
+  else if (instrument_opt_mode == INSTRUMENT_OPT_NGRAM)
+    ptr = alloc_printf("%s + NGRAM-%u", instrument_mode_string[instrument_mode],
+                       ngram_size);
   else
-    ptr = alloc_printf("NGRAM-%u", instrument_mode - INSTRUMENT_NGRAM);
+    ptr = alloc_printf("%s + CTX + NGRAM-%u",
+                       instrument_mode_string[instrument_mode], ngram_size);
 
   if (strstr(argv[0], "afl-clang-lto") != NULL) {
 
@@ -685,9 +715,22 @@ int main(int argc, char **argv, char **envp) {
 
 #ifndef AFL_CLANG_FLTO
   if (instrument_mode == INSTRUMENT_LTO)
-    FATAL("instrumentation mode LTO specified but LLVM support not available");
+    FATAL(
+        "instrumentation mode LTO specified but LLVM support not available "
+        "(requires LLVM 11 or higher)");
 #endif
 
+  if (instrument_opt_mode && instrument_mode != INSTRUMENT_CLASSIC &&
+      instrument_mode != INSTRUMENT_CFG)
+    FATAL(
+        "CTX and NGRAM instrumentation options can only be used with CFG "
+        "(recommended) and CLASSIC instrumentation modes!");
+
+  if (getenv("AFL_LLVM_SKIP_NEVERZERO") && getenv("AFL_LLVM_NOT_ZERO"))
+    FATAL(
+        "AFL_LLVM_NOT_ZERO and AFL_LLVM_SKIP_NEVERZERO can not be set "
+        "together");
+
   if (argc < 2 || strcmp(argv[1], "-h") == 0) {
 
     if (instrument_mode != INSTRUMENT_LTO)
@@ -726,6 +769,7 @@ int main(int argc, char **argv, char **envp) {
         "AFL_HARDEN: adds code hardening to catch memory bugs\n"
         "AFL_INST_RATIO: percentage of branches to instrument\n"
         "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n"
+        "AFL_LLVM_SKIP_NEVERZERO: do not skip zero on trace counters\n"
         "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n"
         "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to "
         "cascaded "
diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
index 118ada52..838e45af 100644
--- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc
+++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
@@ -78,6 +78,8 @@ class AFLLTOPass : public ModulePass {
         FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %d\n",
               ptr, MAP_SIZE - 1);
 
+    skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -111,6 +113,7 @@ class AFLLTOPass : public ModulePass {
   int      afl_global_id = 1, debug = 0, autodictionary = 0;
   uint32_t be_quiet = 0, inst_blocks = 0, inst_funcs = 0, total_instr = 0;
   uint64_t map_addr = 0x10000;
+  char *   skip_nozero = NULL;
 
 };
 
@@ -614,9 +617,14 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
           Value *Incr = IRB.CreateAdd(Counter, One);
 
-          auto cf = IRB.CreateICmpEQ(Incr, Zero);
-          auto carry = IRB.CreateZExt(cf, Int8Ty);
-          Incr = IRB.CreateAdd(Incr, carry);
+          if (skip_nozero) {
+
+            auto cf = IRB.CreateICmpEQ(Incr, Zero);
+            auto carry = IRB.CreateZExt(cf, Int8Ty);
+            Incr = IRB.CreateAdd(Incr, carry);
+
+          }
+
           IRB.CreateStore(Incr, MapPtrIdx)
               ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
@@ -682,8 +690,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
     if (map_addr) {
 
       GlobalVariable *AFLMapAddrFixed = new GlobalVariable(
-          M, Int64Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr",
-          0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+          M, Int64Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr");
       ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
       StoreInst *  StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed);
       StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"),
@@ -698,8 +705,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
       if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
 
       GlobalVariable *AFLFinalLoc = new GlobalVariable(
-          M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc",
-          0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+          M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
       ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc);
       StoreInst *  StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
       StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
@@ -748,10 +754,9 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
         }
 
-        GlobalVariable *AFLDictionaryLen = new GlobalVariable(
-            M, Int32Ty, false, GlobalValue::ExternalLinkage, 0,
-            "__afl_dictionary_len", 0, GlobalVariable::GeneralDynamicTLSModel,
-            0, false);
+        GlobalVariable *AFLDictionaryLen =
+            new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
+                               0, "__afl_dictionary_len");
         ConstantInt *const_len = ConstantInt::get(Int32Ty, offset);
         StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen);
         StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
@@ -762,8 +767,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
             M, ArrayTy, true, GlobalValue::ExternalLinkage,
             ConstantDataArray::get(C,
                                    *(new ArrayRef<char>((char *)ptr, offset))),
-            "__afl_internal_dictionary", 0,
-            GlobalVariable::GeneralDynamicTLSModel, 0, false);
+            "__afl_internal_dictionary");
         AFLInternalDictionary->setInitializer(ConstantDataArray::get(
             C, *(new ArrayRef<char>((char *)ptr, offset))));
         AFLInternalDictionary->setConstant(true);
diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc
index 9314c3d1..0d9e0aba 100644
--- a/llvm_mode/afl-llvm-pass.so.cc
+++ b/llvm_mode/afl-llvm-pass.so.cc
@@ -84,7 +84,7 @@ class AFLCoverage : public ModulePass {
   uint32_t ngram_size = 0;
   uint32_t debug = 0;
   uint32_t map_size = MAP_SIZE;
-  char *   ctx_str = NULL;
+  char *   ctx_str = NULL, *skip_nozero = NULL;
 
 };
 
@@ -180,8 +180,9 @@ bool AFLCoverage::runOnModule(Module &M) {
 #if LLVM_VERSION_MAJOR < 9
   char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO");
 #endif
+  skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
 
-  unsigned PrevLocSize;
+  unsigned PrevLocSize = 0;
 
   char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
   if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
@@ -215,9 +216,6 @@ bool AFLCoverage::runOnModule(Module &M) {
   if (ngram_size) PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize);
 #endif
 
-  if (ctx_str && ngram_size_str)
-    FATAL("you must decide between NGRAM and CTX instrumentation");
-
   /* Get globals for the SHM region and the previous location. Note that
      __afl_prev_loc is thread-local. */
 
@@ -296,70 +294,52 @@ bool AFLCoverage::runOnModule(Module &M) {
 
     if (!isInWhitelist(&F)) continue;
 
-    if (ctx_str && F.size() > 1) {  // Context sensitive coverage
-      // load the context ID of the previous function and write to to a local
-      // variable on the stack
-      auto                 bb = &F.getEntryBlock();
-      BasicBlock::iterator IP = bb->getFirstInsertionPt();
-      IRBuilder<>          IRB(&(*IP));
-      PrevCtx = IRB.CreateLoad(AFLContext);
-      PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-
-      // does the function have calls? and is any of the calls larger than one
-      // basic block?
-      has_calls = 0;
-      for (auto &BB : F) {
-
-        if (has_calls) break;
-        for (auto &IN : BB) {
-
-          CallInst *callInst = nullptr;
-          if ((callInst = dyn_cast<CallInst>(&IN))) {
-
-            Function *Callee = callInst->getCalledFunction();
-            if (!Callee || Callee->size() < 2)
-              continue;
-            else {
+    for (auto &BB : F) {
 
-              has_calls = 1;
-              break;
+      BasicBlock::iterator IP = BB.getFirstInsertionPt();
+      IRBuilder<>          IRB(&(*IP));
 
-            }
+      // Context sensitive coverage
+      if (ctx_str && &BB == &F.getEntryBlock() && F.size() > 1) {
 
-          }
+        // load the context ID of the previous function and write to to a local
+        // variable on the stack
+        PrevCtx = IRB.CreateLoad(AFLContext);
+        PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-        }
+        // does the function have calls? and is any of the calls larger than one
+        // basic block?
+        for (auto &BB : F) {
 
-      }
+          if (has_calls) break;
+          for (auto &IN : BB) {
 
-      // if yes we store a context ID for this function in the global var
-      if (has_calls) {
+            CallInst *callInst = nullptr;
+            if ((callInst = dyn_cast<CallInst>(&IN))) {
 
-        ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
-        StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
-        StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
-                              MDNode::get(C, None));
+              Function *Callee = callInst->getCalledFunction();
+              if (!Callee || Callee->size() < 2)
+                continue;
+              else {
 
-      }
+                has_calls = 1;
+                break;
 
-    }
+              }
 
-    for (auto &BB : F) {
+            }
 
-      BasicBlock::iterator IP = BB.getFirstInsertionPt();
-      IRBuilder<>          IRB(&(*IP));
+          }
 
-      // in CTX mode we have to restore the original context for the caller -
-      // she might be calling other functions which need the correct CTX
-      if (ctx_str && has_calls) {
+        }
 
-        Instruction *Inst = BB.getTerminator();
-        if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+        // if yes we store a context ID for this function in the global var
+        if (has_calls) {
 
-          IRBuilder<> Post_IRB(Inst);
-          StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
-          RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
-                                  MDNode::get(C, None));
+          ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
+          StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+          StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                MDNode::get(C, None));
 
         }
 
@@ -409,7 +389,28 @@ bool AFLCoverage::runOnModule(Module &M) {
       }
 
       // fprintf(stderr, " == %d\n", more_than_one);
-      if (more_than_one != 1) continue;
+      if (more_than_one != 1) {
+
+        // in CTX mode we have to restore the original context for the caller -
+        // she might be calling other functions which need the correct CTX
+        if (ctx_str && has_calls && F.size() > 1) {
+
+          Instruction *Inst = BB.getTerminator();
+          if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+
+            IRBuilder<> Post_IRB(Inst);
+            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+            RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                    MDNode::get(C, None));
+
+          }
+
+        }
+
+        continue;
+
+      }
+
 #endif
 
       ConstantInt *CurLoc;
@@ -433,13 +434,17 @@ bool AFLCoverage::runOnModule(Module &M) {
          prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */
 
       if (ngram_size)
-        PrevLocTrans = IRB.CreateXorReduce(PrevLoc);
+        PrevLocTrans =
+            IRB.CreateZExt(IRB.CreateXorReduce(PrevLoc), IRB.getInt32Ty());
       else
 #endif
-          if (ctx_str)
-        PrevLocTrans = IRB.CreateZExt(IRB.CreateXor(PrevLoc, PrevCtx), Int32Ty);
+        PrevLocTrans = PrevLoc;
+
+      if (ctx_str)
+        PrevLocTrans =
+            IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
       else
-        PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+        PrevLocTrans = IRB.CreateZExt(PrevLocTrans, IRB.getInt32Ty());
 
       /* Load SHM pointer */
 
@@ -451,7 +456,9 @@ bool AFLCoverage::runOnModule(Module &M) {
       if (ngram_size)
         MapPtrIdx = IRB.CreateGEP(
             MapPtr,
-            IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, CurLoc), Int32Ty));
+            IRB.CreateZExt(
+                IRB.CreateXor(PrevLocTrans, IRB.CreateZExt(CurLoc, Int32Ty)),
+                Int32Ty));
       else
 #endif
         MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc));
@@ -467,6 +474,9 @@ bool AFLCoverage::runOnModule(Module &M) {
       if (neverZero_counters_str !=
           NULL) {  // with llvm 9 we make this the default as the bug in llvm is
                    // then fixed
+#else
+      if (!skip_nozero) {
+
 #endif
         /* hexcoder: Realize a counter that skips zero during overflow.
          * Once this counter reaches its maximum value, it next increments to 1
@@ -482,12 +492,8 @@ bool AFLCoverage::runOnModule(Module &M) {
         auto carry = IRB.CreateZExt(cf, Int8Ty);
         Incr = IRB.CreateAdd(Incr, carry);
 
-#if LLVM_VERSION_MAJOR < 9
-
       }
 
-#endif
-
       IRB.CreateStore(Incr, MapPtrIdx)
           ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
@@ -517,6 +523,23 @@ bool AFLCoverage::runOnModule(Module &M) {
 
       }
 
+      // in CTX mode we have to restore the original context for the caller -
+      // she might be calling other functions which need the correct CTX.
+      // Currently this is only needed for the Ubuntu clang-6.0 bug
+      if (ctx_str && has_calls && F.size() > 1) {
+
+        Instruction *Inst = BB.getTerminator();
+        if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+
+          IRBuilder<> Post_IRB(Inst);
+          StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+          RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+        }
+
+      }
+
       inst_blocks++;
 
     }
@@ -560,8 +583,7 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         GlobalVariable *AFLFinalLoc = new GlobalVariable(
             M, Int32Ty, true, GlobalValue::ExternalLinkage, 0,
-            "__afl_final_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0,
-            false);
+            "__afl_final_loc");
         ConstantInt *const_loc = ConstantInt::get(Int32Ty, map_size);
         StoreInst *  StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
         StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c
index 8867ae36..56038f7a 100644
--- a/llvm_mode/afl-llvm-rt.o.c
+++ b/llvm_mode/afl-llvm-rt.o.c
@@ -67,22 +67,19 @@ u8  __afl_area_initial[MAP_SIZE];
 u8 *__afl_area_ptr = __afl_area_initial;
 u8 *__afl_dictionary;
 
+u32 __afl_final_loc;
+u32 __afl_map_size = MAP_SIZE;
+u32 __afl_dictionary_len;
+u64 __afl_map_addr;
+
 #ifdef __ANDROID__
 PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
-u32        __afl_final_loc;
 u32        __afl_prev_ctx;
 u32        __afl_cmp_counter;
-u32        __afl_dictionary_len;
-u32        __afl_map_size = MAP_SIZE;
-u64        __afl_map_addr;
 #else
 __thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
-__thread u32        __afl_final_loc;
 __thread u32        __afl_prev_ctx;
 __thread u32        __afl_cmp_counter;
-__thread u32        __afl_dictionary_len;
-__thread u32        __afl_map_size = MAP_SIZE;
-__thread u64        __afl_map_addr;
 #endif
 
 struct cmp_map *__afl_cmp_map;
@@ -152,7 +149,7 @@ static void __afl_map_shm(void) {
 
   if (getenv("AFL_DEBUG"))
     fprintf(stderr,
-            "DEBUG: id_str %s, __afl_map_addr 0x%x, MAP_SIZE %u, "
+            "DEBUG: id_str %s, __afl_map_addr 0x%llx, MAP_SIZE %u, "
             "__afl_final_loc %u, max_size_forkserver %u/0x%x\n",
             id_str == NULL ? "<null>" : id_str, __afl_map_addr, MAP_SIZE,
             __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc
index c871c1c4..2111b646 100644
--- a/llvm_mode/compare-transform-pass.so.cc
+++ b/llvm_mode/compare-transform-pass.so.cc
@@ -211,20 +211,24 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
           }
 
           // not literal? maybe global or local variable
-          if (!(HasStr1 ^ HasStr2)) {
+          if (!(HasStr1 || HasStr2)) {
 
             auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
             if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
 
               if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
 
-                if (auto *Array =
-                        dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+                if (Var->hasInitializer()) {
 
-                  HasStr2 = true;
-                  Str2 = Array->getAsString();
-                  valueMap[Str2P] = new std::string(Str2.str());
-                  // fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+                  if (auto *Array =
+                          dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+                    HasStr2 = true;
+                    Str2 = Array->getAsString();
+                    valueMap[Str2P] = new std::string(Str2.str());
+                    fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+
+                  }
 
                 }
 
@@ -239,13 +243,17 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
                 if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
 
-                  if (auto *Array =
-                          dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+                  if (Var->hasInitializer()) {
+
+                    if (auto *Array = dyn_cast<ConstantDataArray>(
+                            Var->getInitializer())) {
 
-                    HasStr1 = true;
-                    Str1 = Array->getAsString();
-                    valueMap[Str1P] = new std::string(Str1.str());
-                    // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
+                      HasStr1 = true;
+                      Str1 = Array->getAsString();
+                      valueMap[Str1P] = new std::string(Str1.str());
+                      // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
+
+                    }
 
                   }
 
@@ -260,13 +268,13 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
             }
 
-            if ((HasStr1 ^ HasStr2)) indirect = true;
+            if ((HasStr1 || HasStr2)) indirect = true;
 
           }
 
           if (isIntMemcpy) continue;
 
-          if (!(HasStr1 ^ HasStr2)) {
+          if (!(HasStr1 || HasStr2)) {
 
             // do we have a saved local variable initialization?
             std::string *val = valueMap[Str1P];
@@ -294,7 +302,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
           }
 
           /* handle cases of one string is const, one string is variable */
-          if (!(HasStr1 ^ HasStr2)) continue;
+          if (!(HasStr1 || HasStr2)) continue;
 
           if (isMemcmp || isStrncmp || isStrncasecmp) {
 
@@ -359,7 +367,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
-    if (!(HasStr1 ^ HasStr2)) {
+    if (!(HasStr1 || HasStr2)) {
 
       // do we have a saved local or global variable initialization?
       std::string *val = valueMap[Str1P];
@@ -386,13 +394,13 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
       TmpConstStr = Str1.str();
       VarStr = Str2P;
-      constLen = isMemcmp ? sizedLen : GetStringLength(Str1P);
+      constLen = isMemcmp ? sizedLen : TmpConstStr.length();
 
     } else {
 
       TmpConstStr = Str2.str();
       VarStr = Str1P;
-      constLen = isMemcmp ? sizedLen : GetStringLength(Str2P);
+      constLen = isMemcmp ? sizedLen : TmpConstStr.length();
 
     }
 
@@ -400,9 +408,14 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
      * the StringRef (in comparison to std::string a StringRef has built-in
      * runtime bounds checking, which makes debugging easier) */
     TmpConstStr.append("\0", 1);
+    if (!sizedLen) constLen++;
     ConstStr = StringRef(TmpConstStr);
-
-    if (isSizedcmp && constLen > sizedLen) { constLen = sizedLen; }
+    // fprintf(stderr, "issized: %d, const > sized ? %u > %u\n", isSizedcmp,
+    // constLen, sizedLen);
+    if (isSizedcmp && constLen > sizedLen && sizedLen) constLen = sizedLen;
+    if (constLen > TmpConstStr.length()) constLen = TmpConstStr.length();
+    if (!constLen) constLen = TmpConstStr.length();
+    if (!constLen) continue;
 
     if (!be_quiet)
       errs() << callInst->getCalledFunction()->getName() << ": len " << constLen