about summary refs log tree commit diff
path: root/instrumentation
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2021-03-06 18:47:58 +0100
committerGitHub <noreply@github.com>2021-03-06 18:47:58 +0100
commit976cb3e36c130dc31fb189e9bb4f036730fca7ee (patch)
tree94143e3775e23597abe00b1ad9373c6c90b62632 /instrumentation
parentbd0a23de73011a390714b9f3836a46443054fdd5 (diff)
parent9b3d8c327d33191b181219ffce411b40bdbe8902 (diff)
downloadafl++-976cb3e36c130dc31fb189e9bb4f036730fca7ee.tar.gz
Merge pull request #778 from AFLplusplus/dev
This fixes 3 different crash issues
Diffstat (limited to 'instrumentation')
-rw-r--r--instrumentation/LLVMInsTrim.so.cc29
-rw-r--r--instrumentation/README.ctx.md22
-rw-r--r--instrumentation/SanitizerCoverageLTO.so.cc21
-rw-r--r--instrumentation/afl-compiler-rt.o.c251
-rw-r--r--instrumentation/afl-llvm-dict2file.so.cc23
-rw-r--r--instrumentation/afl-llvm-lto-instrumentation.so.cc21
-rw-r--r--instrumentation/afl-llvm-pass.so.cc18
-rw-r--r--instrumentation/cmplog-instructions-pass.cc14
-rw-r--r--instrumentation/compare-transform-pass.so.cc26
-rw-r--r--instrumentation/split-compares-pass.so.cc26
10 files changed, 342 insertions, 109 deletions
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc
index 948f8f3a..f0de6536 100644
--- a/instrumentation/LLVMInsTrim.so.cc
+++ b/instrumentation/LLVMInsTrim.so.cc
@@ -135,7 +135,7 @@ struct InsTrim : public ModulePass {
     unsigned int PrevLocSize = 0;
     char *       ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
     if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
-    char *ctx_str = getenv("AFL_LLVM_CTX");
+    char *caller_str = getenv("AFL_LLVM_CALLER");
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
     unsigned int ngram_size = 0;
@@ -197,9 +197,9 @@ struct InsTrim : public ModulePass {
                            GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
     GlobalVariable *AFLPrevLoc;
     GlobalVariable *AFLContext = NULL;
-    LoadInst *      PrevCtx = NULL;  // for CTX sensitive coverage
+    LoadInst *      PrevCaller = NULL;  // for CALLER sensitive coverage
 
-    if (ctx_str)
+    if (caller_str)
 #if defined(__ANDROID__) || defined(__HAIKU__)
       AFLContext = new GlobalVariable(
           M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
@@ -398,11 +398,11 @@ struct InsTrim : public ModulePass {
         unsigned int cur_loc;
 
         // Context sensitive coverage
-        if (ctx_str && &BB == &F.getEntryBlock()) {
+        if (caller_str && &BB == &F.getEntryBlock()) {
 
-          PrevCtx = IRB.CreateLoad(AFLContext);
-          PrevCtx->setMetadata(M.getMDKindID("nosanitize"),
-                               MDNode::get(C, None));
+          PrevCaller = IRB.CreateLoad(AFLContext);
+          PrevCaller->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
 
           // does the function have calls? and is any of the calls larger than
           // one basic block?
@@ -441,7 +441,7 @@ struct InsTrim : public ModulePass {
 
           }
 
-        }  // END of ctx_str
+        }  // END of caller_str
 
         if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
 
@@ -485,9 +485,9 @@ struct InsTrim : public ModulePass {
 #endif
           PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
 
-        if (ctx_str)
+        if (caller_str)
           PrevLocTrans =
-              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
+              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCaller), Int32Ty);
 
         /* Load SHM pointer */
         LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
@@ -535,16 +535,17 @@ struct InsTrim : public ModulePass {
         IRB.CreateStore(Incr, MapPtrIdx)
             ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-        if (ctx_str && has_calls) {
+        if (caller_str && has_calls) {
 
-          // in CTX mode we have to restore the original context for the
+          // in CALLER mode we have to restore the original context for the
           // caller - she might be calling other functions which need the
-          // correct CTX
+          // correct CALLER
           Instruction *Inst = BB.getTerminator();
           if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
             IRBuilder<> Post_IRB(Inst);
-            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+            StoreInst * RestoreCtx =
+                Post_IRB.CreateStore(PrevCaller, AFLContext);
             RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                     MDNode::get(C, None));
 
diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md
index caf2c09a..577b3e5f 100644
--- a/instrumentation/README.ctx.md
+++ b/instrumentation/README.ctx.md
@@ -4,14 +4,19 @@
 
 This is an LLVM-based implementation of the context sensitive branch coverage.
 
-Basically every function gets its own ID and that ID is combined with the
-edges of the called functions.
+Basically every function gets its own ID and, every time when an edge is logged,
+all the IDs in the callstack are hashed and combined with the edge transition
+hash to augment the classic edge coverage with the information about the
+calling context.
 
 So if both function A and function B call a function C, the coverage
 collected in C will be different.
 
 In math the coverage is collected as follows:
-`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+`map[current_location_ID ^ previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
+
+The callstack hash is produced XOR-ing the function IDs to avoid explosion with
+recursive functions.
 
 ## Usage
 
@@ -20,3 +25,14 @@ Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
 It is highly recommended to increase the MAP_SIZE_POW2 definition in
 config.h to at least 18 and maybe up to 20 for this as otherwise too
 many map collisions occur.
+
+## Caller Branch Coverage
+
+If the context sensitive coverage introduces too may collisions and becoming
+decremental, the user can choose to augment edge coverage with just the
+called function ID, instead of the entire callstack hash.
+
+In math the coverage is collected as follows:
+`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+
+Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment variable.
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
index 3026abc8..13a5e5fd 100644
--- a/instrumentation/SanitizerCoverageLTO.so.cc
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -733,7 +733,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                             Var->getInitializer())) {
 
                       HasStr2 = true;
-                      Str2 = Array->getAsString().str();
+                      Str2 = Array->getRawDataValues().str();
 
                     }
 
@@ -809,7 +809,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString().str();
+                      Str1 = Array->getRawDataValues().str();
 
                     }
 
@@ -849,15 +849,18 @@ bool ModuleSanitizerCoverage::instrumentModule(
               thestring = Str2;
 
             optLen = thestring.length();
+            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
 
             if (isMemcmp || isStrncmp || isStrncasecmp) {
 
               Value *      op2 = callInst->getArgOperand(2);
               ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
               if (ilen) {
 
                 uint64_t literalLength = optLen;
                 optLen = ilen->getZExtValue();
+                if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
                   thestring.append("\0", 1);
                   addedNull = true;
@@ -872,17 +875,21 @@ bool ModuleSanitizerCoverage::instrumentModule(
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false) {
+              if (addedNull == false && thestring[optLen - 1] != '\0') {
 
                 thestring.append("\0", 1);  // add null byte
                 optLen++;
 
               }
 
-              // ensure we do not have garbage
-              size_t offset = thestring.find('\0', 0);
-              if (offset + 1 < optLen) optLen = offset + 1;
-              thestring = thestring.substr(0, optLen);
+              if (!isStdString) {
+
+                // ensure we do not have garbage
+                size_t offset = thestring.find('\0', 0);
+                if (offset + 1 < optLen) optLen = offset + 1;
+                thestring = thestring.substr(0, optLen);
+
+              }
 
             }
 
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index ecb94cab..a702ec39 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -34,6 +34,7 @@
 #include <errno.h>
 
 #include <sys/mman.h>
+#include <sys/syscall.h>
 #ifndef __HAIKU__
   #include <sys/shm.h>
 #endif
@@ -122,6 +123,21 @@ static u8 is_persistent;
 
 static u8 _is_sancov;
 
+/* Debug? */
+
+static u32 __afl_debug;
+
+/* Already initialized markers */
+
+u32 __afl_already_initialized_shm;
+u32 __afl_already_initialized_forkserver;
+u32 __afl_already_initialized_first;
+u32 __afl_already_initialized_second;
+
+/* Dummy pipe for area_is_valid() */
+
+static int __afl_dummy_fd[2] = {2, 2};
+
 /* ensure we kill the child on termination */
 
 void at_exit(int signal) {
@@ -171,7 +187,7 @@ static void __afl_map_shm_fuzz() {
 
   char *id_str = getenv(SHM_FUZZ_ENV_VAR);
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr, "DEBUG: fuzzcase shmem %s\n", id_str ? id_str : "none");
 
@@ -217,7 +233,7 @@ static void __afl_map_shm_fuzz() {
     __afl_fuzz_len = (u32 *)map;
     __afl_fuzz_ptr = map + sizeof(u32);
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n");
 
@@ -237,6 +253,9 @@ static void __afl_map_shm_fuzz() {
 
 static void __afl_map_shm(void) {
 
+  if (__afl_already_initialized_shm) return;
+  __afl_already_initialized_shm = 1;
+
   // if we are not running in afl ensure the map exists
   if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_ptr_dummy; }
 
@@ -294,7 +313,7 @@ static void __afl_map_shm(void) {
      early-stage __afl_area_initial region that is needed to allow some really
      hacky .init code to work correctly in projects such as OpenSSL. */
 
-  if (getenv("AFL_DEBUG"))
+  if (__afl_debug)
     fprintf(stderr,
             "DEBUG: id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
             "__afl_map_addr 0x%llx, MAP_SIZE %u, __afl_final_loc %u, "
@@ -350,17 +369,18 @@ static void __afl_map_shm(void) {
 
     }
 
-    if (shm_base == MAP_FAILED) {
+    close(shm_fd);
+    shm_fd = -1;
 
-      close(shm_fd);
-      shm_fd = -1;
+    if (shm_base == MAP_FAILED) {
 
       fprintf(stderr, "mmap() failed\n");
+      perror("mmap for map");
+
       if (__afl_map_addr)
         send_forkserver_error(FS_ERROR_MAP_ADDR);
       else
         send_forkserver_error(FS_ERROR_MMAP);
-      perror("mmap for map");
 
       exit(2);
 
@@ -467,7 +487,7 @@ static void __afl_map_shm(void) {
 
   id_str = getenv(CMPLOG_SHM_ENV_VAR);
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr, "DEBUG: cmplog id_str %s\n",
             id_str == NULL ? "<null>" : id_str);
@@ -476,6 +496,12 @@ static void __afl_map_shm(void) {
 
   if (id_str) {
 
+    if ((__afl_dummy_fd[1] = open("/dev/null", O_WRONLY)) < 0) {
+
+      if (pipe(__afl_dummy_fd) < 0) { __afl_dummy_fd[1] = 1; }
+
+    }
+
 #ifdef USEMMAP
     const char *    shm_file_path = id_str;
     int             shm_fd = -1;
@@ -526,6 +552,58 @@ static void __afl_map_shm(void) {
 
 }
 
+/* unmap SHM. */
+
+static void __afl_unmap_shm(void) {
+
+  if (!__afl_already_initialized_shm) return;
+
+  char *id_str = getenv(SHM_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+
+    munmap((void *)__afl_area_ptr, __afl_map_size);
+
+#else
+
+    shmdt((void *)__afl_area_ptr);
+
+#endif
+
+  } else if ((!__afl_area_ptr || __afl_area_ptr == __afl_area_initial) &&
+
+             __afl_map_addr) {
+
+    munmap((void *)__afl_map_addr, __afl_map_size);
+
+  }
+
+  __afl_area_ptr = __afl_area_ptr_dummy;
+
+  id_str = getenv(CMPLOG_SHM_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+
+    munmap((void *)__afl_cmp_map, __afl_map_size);
+
+#else
+
+    shmdt((void *)__afl_cmp_map);
+
+#endif
+
+    __afl_cmp_map = NULL;
+
+  }
+
+  __afl_already_initialized_shm = 0;
+
+}
+
 #ifdef __linux__
 static void __afl_start_snapshots(void) {
 
@@ -554,7 +632,7 @@ static void __afl_start_snapshots(void) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) { _exit(1); }
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
 
@@ -731,6 +809,9 @@ static void __afl_start_snapshots(void) {
 
 static void __afl_start_forkserver(void) {
 
+  if (__afl_already_initialized_forkserver) return;
+  __afl_already_initialized_forkserver = 1;
+
   struct sigaction orig_action;
   sigaction(SIGTERM, NULL, &orig_action);
   old_sigterm_handler = orig_action.sa_handler;
@@ -781,7 +862,7 @@ static void __afl_start_forkserver(void) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
 
@@ -1016,7 +1097,7 @@ void __afl_manual_init(void) {
     __afl_sharedmem_fuzzing = 0;
     if (__afl_area_ptr == NULL) __afl_area_ptr = __afl_area_ptr_dummy;
 
-    if (getenv("AFL_DEBUG"))
+    if (__afl_debug)
       fprintf(stderr,
               "DEBUG: disabled instrumentation because of "
               "AFL_DISABLE_LLVM_INSTRUMENTATION\n");
@@ -1060,6 +1141,11 @@ __attribute__((constructor(CTOR_PRIO))) void __afl_auto_early(void) {
 
 __attribute__((constructor(1))) void __afl_auto_second(void) {
 
+  if (__afl_already_initialized_second) return;
+  __afl_already_initialized_second = 1;
+
+  if (getenv("AFL_DEBUG")) { __afl_debug = 1; }
+
   if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
   u8 *ptr;
 
@@ -1084,17 +1170,18 @@ __attribute__((constructor(1))) void __afl_auto_second(void) {
 
   }
 
-}
+}  // ptr memleak report is a false positive
 
 /* preset __afl_area_ptr #1 - at constructor level 0 global variables have
    not been set */
 
 __attribute__((constructor(0))) void __afl_auto_first(void) {
 
-  if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
-  u8 *ptr;
+  if (__afl_already_initialized_first) return;
+  __afl_already_initialized_first = 1;
 
-  ptr = (u8 *)malloc(MAP_INITIAL_SIZE);
+  if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+  u8 *ptr = (u8 *)malloc(MAP_INITIAL_SIZE);
 
   if (ptr && (ssize_t)ptr != -1) {
 
@@ -1103,7 +1190,7 @@ __attribute__((constructor(0))) void __afl_auto_first(void) {
 
   }
 
-}
+}  // ptr memleak report is a false positive
 
 /* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard.
    It remains non-operational in the traditional, plugin-backed LLVM mode.
@@ -1171,11 +1258,13 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   _is_sancov = 1;
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr,
-            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges)\n",
-            start, stop, (unsigned long)(stop - start));
+            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) "
+            "after_fs=%u\n",
+            start, stop, (unsigned long)(stop - start),
+            __afl_already_initialized_forkserver);
 
   }
 
@@ -1191,6 +1280,36 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  /* instrumented code is loaded *after* our forkserver is up. this is a
+     problem. We cannot prevent collisions then :( */
+  if (__afl_already_initialized_forkserver &&
+      __afl_final_loc + 1 + stop - start > __afl_map_size) {
+
+    if (__afl_debug)
+      fprintf(stderr, "Warning: new instrumneted code after the forkserver!\n");
+    __afl_final_loc = 2;
+
+    if (1 + stop - start > __afl_map_size) {
+
+      *(start++) = ++__afl_final_loc;
+
+      while (start < stop) {
+
+        if (R(100) < inst_ratio)
+          *start = ++__afl_final_loc % __afl_map_size;
+        else
+          *start = 0;
+
+        start++;
+
+      }
+
+      return;
+
+    }
+
+  }
+
   /* Make sure that the first element in the range is always set - we use that
      to avoid duplicate calls (which can happen as an artifact of the underlying
      implementation in LLVM). */
@@ -1208,6 +1327,28 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  if (__afl_debug) {
+
+    fprintf(stderr,
+            "Done __sanitizer_cov_trace_pc_guard_init: __afl_final_loc = %u\n",
+            __afl_final_loc);
+
+  }
+
+  if (__afl_already_initialized_shm && __afl_final_loc > __afl_map_size) {
+
+    if (__afl_debug) {
+
+      fprintf(stderr, "Reinit shm necessary (+%u)\n",
+              __afl_final_loc - __afl_map_size);
+
+    }
+
+    __afl_unmap_shm();
+    __afl_map_shm();
+
+  }
+
 }
 
 ///// CmpLog instrumentation
@@ -1551,17 +1692,42 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
 
 }
 
+__attribute__((weak)) void *__asan_region_is_poisoned(void *beg, size_t size) {
+
+  return NULL;
+
+}
+
 // POSIX shenanigan to see if an area is mapped.
 // If it is mapped as X-only, we have a problem, so maybe we should add a check
 // to avoid to call it on .text addresses
-static int area_is_mapped(void *ptr, size_t len) {
+static int area_is_valid(void *ptr, size_t len) {
 
-  char *p = (char *)ptr;
-  char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
+  if (unlikely(__asan_region_is_poisoned(ptr, len))) { return 0; }
 
-  int r = msync(page, (p - page) + len, MS_ASYNC);
-  if (r < 0) return errno != ENOMEM;
-  return 1;
+  long r = syscall(__afl_dummy_fd[1], SYS_write, ptr, len);
+
+  if (unlikely(r <= 0 || r > len)) {  // fail - maybe hitting asan boundary?
+
+    char *p = (char *)ptr;
+    long  page_size = sysconf(_SC_PAGE_SIZE);
+    char *page = (char *)((uintptr_t)p & ~(page_size - 1)) + page_size;
+    if (page < p + len) { return 0; }  // no isnt, return fail
+    len -= (p + len - page);
+    r = syscall(__afl_dummy_fd[1], SYS_write, p, len);
+
+  }
+
+  // partial writes - we return what was written.
+  if (likely(r >= 0 && r <= len)) {
+
+    return (int)r;
+
+  } else {
+
+    return 0;
+
+  }
 
 }
 
@@ -1569,19 +1735,22 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   /*
     u32 i;
-    if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
+    if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
     fprintf(stderr, "rtn arg0=");
-    for (i = 0; i < 24; i++)
+    for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr1[i]);
     fprintf(stderr, " arg1=");
-    for (i = 0; i < 24; i++)
+    for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr2[i]);
     fprintf(stderr, "\n");
   */
 
   if (unlikely(!__afl_cmp_map)) return;
-
-  if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
+  int l1, l2;
+  if ((l1 = area_is_valid(ptr1, 32)) <= 0 ||
+      (l2 = area_is_valid(ptr2, 32)) <= 0)
+    return;
+  int len = MIN(l1, l2);
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
@@ -1592,17 +1761,17 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
   if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
 
     __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
-    hits = 0;
     __afl_cmp_map->headers[k].hits = 1;
-    __afl_cmp_map->headers[k].shape = 31;
+    __afl_cmp_map->headers[k].shape = len - 1;
+    hits = 0;
 
   } else {
 
     hits = __afl_cmp_map->headers[k].hits++;
 
-    if (__afl_cmp_map->headers[k].shape < 31) {
+    if (__afl_cmp_map->headers[k].shape < len) {
 
-      __afl_cmp_map->headers[k].shape = 31;
+      __afl_cmp_map->headers[k].shape = len;
 
     }
 
@@ -1610,9 +1779,9 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   hits &= CMP_MAP_RTN_H - 1;
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
-                   ptr1, 32);
+                   ptr1, len);
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1,
-                   ptr2, 32);
+                   ptr2, len);
 
 }
 
@@ -1658,7 +1827,8 @@ static u8 *get_llvm_stdstring(u8 *string) {
 void __cmplog_rtn_gcc_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 
   if (unlikely(!__afl_cmp_map)) return;
-  if (!area_is_mapped(stdstring, 32) || !area_is_mapped(cstring, 32)) return;
+  if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0)
+    return;
 
   __cmplog_rtn_hook(get_gcc_stdstring(stdstring), cstring);
 
@@ -1667,7 +1837,7 @@ void __cmplog_rtn_gcc_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
   if (unlikely(!__afl_cmp_map)) return;
-  if (!area_is_mapped(stdstring1, 32) || !area_is_mapped(stdstring2, 32))
+  if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0)
     return;
 
   __cmplog_rtn_hook(get_gcc_stdstring(stdstring1),
@@ -1678,7 +1848,8 @@ void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 void __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 
   if (unlikely(!__afl_cmp_map)) return;
-  if (!area_is_mapped(stdstring, 32) || !area_is_mapped(cstring, 32)) return;
+  if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0)
+    return;
 
   __cmplog_rtn_hook(get_llvm_stdstring(stdstring), cstring);
 
@@ -1687,7 +1858,7 @@ void __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 void __cmplog_rtn_llvm_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
   if (unlikely(!__afl_cmp_map)) return;
-  if (!area_is_mapped(stdstring1, 32) || !area_is_mapped(stdstring2, 32))
+  if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0)
     return;
 
   __cmplog_rtn_hook(get_llvm_stdstring(stdstring1),
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
index 19ef15f7..c954054b 100644
--- a/instrumentation/afl-llvm-dict2file.so.cc
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -357,6 +357,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           StringRef   TmpStr;
           bool        HasStr1;
           getConstantStringInfo(Str1P, TmpStr);
+
           if (TmpStr.empty()) {
 
             HasStr1 = false;
@@ -403,7 +404,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr2 = true;
-                    Str2 = Array->getAsString().str();
+                    Str2 = Array->getRawDataValues().str();
 
                   }
 
@@ -479,7 +480,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr1 = true;
-                    Str1 = Array->getAsString().str();
+                    Str1 = Array->getRawDataValues().str();
 
                   }
 
@@ -520,14 +521,18 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
           optLen = thestring.length();
 
+          if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
+
           if (isMemcmp || isStrncmp || isStrncasecmp) {
 
             Value *      op2 = callInst->getArgOperand(2);
             ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
             if (ilen) {
 
               uint64_t literalLength = optLen;
               optLen = ilen->getZExtValue();
+              if (optLen < 2) { continue; }
               if (literalLength + 1 == optLen) {  // add null byte
                 thestring.append("\0", 1);
                 addedNull = true;
@@ -542,17 +547,21 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           // was not already added
           if (!isMemcmp) {
 
-            if (addedNull == false) {
+            if (addedNull == false && thestring[optLen - 1] != '\0') {
 
               thestring.append("\0", 1);  // add null byte
               optLen++;
 
             }
 
-            // ensure we do not have garbage
-            size_t offset = thestring.find('\0', 0);
-            if (offset + 1 < optLen) optLen = offset + 1;
-            thestring = thestring.substr(0, optLen);
+            if (!isStdString) {
+
+              // ensure we do not have garbage
+              size_t offset = thestring.find('\0', 0);
+              if (offset + 1 < optLen) optLen = offset + 1;
+              thestring = thestring.substr(0, optLen);
+
+            }
 
           }
 
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
index 137bae2c..50306224 100644
--- a/instrumentation/afl-llvm-lto-instrumentation.so.cc
+++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc
@@ -519,7 +519,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                             Var->getInitializer())) {
 
                       HasStr2 = true;
-                      Str2 = Array->getAsString().str();
+                      Str2 = Array->getRawDataValues().str();
 
                     }
 
@@ -595,7 +595,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString().str();
+                      Str1 = Array->getRawDataValues().str();
 
                     }
 
@@ -635,15 +635,18 @@ bool AFLLTOPass::runOnModule(Module &M) {
               thestring = Str2;
 
             optLen = thestring.length();
+            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
 
             if (isMemcmp || isStrncmp || isStrncasecmp) {
 
               Value *      op2 = callInst->getArgOperand(2);
               ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
               if (ilen) {
 
                 uint64_t literalLength = optLen;
                 optLen = ilen->getZExtValue();
+                if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
                   thestring.append("\0", 1);
                   addedNull = true;
@@ -658,17 +661,21 @@ bool AFLLTOPass::runOnModule(Module &M) {
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false) {
+              if (addedNull == false && thestring[optLen - 1] != '\0') {
 
                 thestring.append("\0", 1);  // add null byte
                 optLen++;
 
               }
 
-              // ensure we do not have garbage
-              size_t offset = thestring.find('\0', 0);
-              if (offset + 1 < optLen) optLen = offset + 1;
-              thestring = thestring.substr(0, optLen);
+              if (!isStdString) {
+
+                // ensure we do not have garbage
+                size_t offset = thestring.find('\0', 0);
+                if (offset + 1 < optLen) optLen = offset + 1;
+                thestring = thestring.substr(0, optLen);
+
+              }
 
             }
 
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
index 16fd9c94..33898aec 100644
--- a/instrumentation/afl-llvm-pass.so.cc
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -84,7 +84,7 @@ class AFLCoverage : public ModulePass {
   uint32_t ngram_size = 0;
   uint32_t map_size = MAP_SIZE;
   uint32_t function_minimum_size = 1;
-  char *   ctx_str = NULL, *skip_nozero = NULL;
+  char *   ctx_str = NULL, *caller_str = NULL, *skip_nozero = NULL;
 
 };
 
@@ -187,6 +187,7 @@ bool AFLCoverage::runOnModule(Module &M) {
   char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
   if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
   ctx_str = getenv("AFL_LLVM_CTX");
+  caller_str = getenv("AFL_LLVM_CALLER");
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
   /* Decide previous location vector size (must be a power of two) */
@@ -240,7 +241,7 @@ bool AFLCoverage::runOnModule(Module &M) {
   GlobalVariable *AFLPrevLoc;
   GlobalVariable *AFLContext = NULL;
 
-  if (ctx_str)
+  if (ctx_str || caller_str)
 #if defined(__ANDROID__) || defined(__HAIKU__)
     AFLContext = new GlobalVariable(
         M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
@@ -318,7 +319,7 @@ bool AFLCoverage::runOnModule(Module &M) {
       IRBuilder<>          IRB(&(*IP));
 
       // Context sensitive coverage
-      if (ctx_str && &BB == &F.getEntryBlock()) {
+      if ((ctx_str || caller_str) && &BB == &F.getEntryBlock()) {
 
         // load the context ID of the previous function and write to to a local
         // variable on the stack
@@ -354,8 +355,9 @@ bool AFLCoverage::runOnModule(Module &M) {
         // if yes we store a context ID for this function in the global var
         if (has_calls) {
 
-          ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
-          StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+          Value *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
+          if (ctx_str) NewCtx = IRB.CreateXor(PrevCtx, NewCtx);
+          StoreInst *StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
           StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                 MDNode::get(C, None));
 
@@ -411,7 +413,7 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         // in CTX mode we have to restore the original context for the caller -
         // she might be calling other functions which need the correct CTX
-        if (ctx_str && has_calls) {
+        if ((ctx_str || caller_str) && has_calls) {
 
           Instruction *Inst = BB.getTerminator();
           if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
@@ -458,7 +460,7 @@ bool AFLCoverage::runOnModule(Module &M) {
 #endif
         PrevLocTrans = PrevLoc;
 
-      if (ctx_str)
+      if (ctx_str || caller_str)
         PrevLocTrans =
             IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
       else
@@ -545,7 +547,7 @@ bool AFLCoverage::runOnModule(Module &M) {
       // in CTX mode we have to restore the original context for the caller -
       // she might be calling other functions which need the correct CTX.
       // Currently this is only needed for the Ubuntu clang-6.0 bug
-      if (ctx_str && has_calls) {
+      if ((ctx_str || caller_str) && has_calls) {
 
         Instruction *Inst = BB.getTerminator();
         if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index dbca9afa..ad334d3b 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -418,7 +418,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       IntegerType *        intTyOp0 = NULL;
       IntegerType *        intTyOp1 = NULL;
       unsigned             max_size = 0, cast_size = 0;
-      unsigned char        attr = 0, do_cast = 0;
+      unsigned char        attr = 0;
       std::vector<Value *> args;
 
       CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst);
@@ -484,7 +484,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           max_size = 128;
 
         attr += 8;
-        do_cast = 1;
 
       } else {
 
@@ -503,12 +502,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       if (!max_size || max_size < 16) { continue; }
 
-      if (max_size % 8) {
-
-        max_size = (((max_size / 8) + 1) * 8);
-        do_cast = 1;
-
-      }
+      if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); }
 
       if (max_size > 128) {
 
@@ -521,7 +515,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
         }
 
         max_size = 128;
-        do_cast = 1;
 
       }
 
@@ -537,7 +530,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           break;
         default:
           cast_size = 128;
-          do_cast = 1;
 
       }
 
@@ -574,7 +566,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       }
 
       // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n",
-      //         max_size, cast_size, attr, do_cast);
+      //         max_size, cast_size, attr);
 
       switch (cast_size) {
 
diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc
index bd524a69..3ecba4e6 100644
--- a/instrumentation/compare-transform-pass.so.cc
+++ b/instrumentation/compare-transform-pass.so.cc
@@ -229,9 +229,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr2 = true;
-                    Str2 = Array->getAsString();
+                    Str2 = Array->getRawDataValues();
                     valueMap[Str2P] = new std::string(Str2.str());
-                    fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+                    // fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
 
                   }
 
@@ -254,7 +254,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString();
+                      Str1 = Array->getRawDataValues();
                       valueMap[Str1P] = new std::string(Str1.str());
                       // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
 
@@ -316,7 +316,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
               uint64_t len = ilen->getZExtValue();
               // if len is zero this is a pointless call but allow real
               // implementation to worry about that
-              if (!len) continue;
+              if (len < 2) continue;
 
               if (isMemcmp) {
 
@@ -420,15 +420,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    if (TmpConstStr.length() < 2 ||
+        (TmpConstStr.length() == 2 && !TmpConstStr[1])) {
+
+      continue;
+
+    }
+
     // add null termination character implicit in c strings
-    TmpConstStr.append("\0", 1);
+    if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) {
+
+      TmpConstStr.append("\0", 1);
+
+    }
 
     // in the unusual case the const str has embedded null
     // characters, the string comparison functions should terminate
     // at the first null
-    if (!isMemcmp)
+    if (!isMemcmp) {
+
       TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
 
+    }
+
     constStrLen = TmpConstStr.length();
     // prefer use of StringRef (in comparison to std::string a StringRef has
     // built-in runtime bounds checking, which makes debugging easier)
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index 80cd90ba..b02a89fb 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -149,8 +149,11 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) {
     auto op1 = FcmpInst->getOperand(1);
 
     /* find out what the new predicate is going to be */
-    auto               pred = dyn_cast<CmpInst>(FcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(FcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     switch (pred) {
 
       case CmpInst::FCMP_UGE:
@@ -276,8 +279,11 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
     auto op1 = IcmpInst->getOperand(1);
 
     /* find out what the new predicate is going to be */
-    auto               pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     switch (pred) {
 
       case CmpInst::ICMP_UGE:
@@ -412,8 +418,11 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
     IntegerType *IntType = IntegerType::get(C, bitw);
 
     /* get the new predicate */
-    auto               pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     if (pred == CmpInst::ICMP_SGT) {
 
       new_pred = CmpInst::ICMP_UGT;
@@ -603,6 +612,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     if (op_size != op1->getType()->getPrimitiveSizeInBits()) { continue; }
 
     const unsigned int sizeInBits = op0->getType()->getPrimitiveSizeInBits();
+
+    // BUG FIXME TODO: u64 does not work for > 64 bit ... e.g. 80 and 128 bit
+    if (sizeInBits > 64) { continue; }
+
     const unsigned int precision = sizeInBits == 32    ? 24
                                    : sizeInBits == 64  ? 53
                                    : sizeInBits == 128 ? 113
@@ -610,8 +623,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
                                    : sizeInBits == 80  ? 65
                                                        : sizeInBits - 8;
 
-    const unsigned shiftR_exponent = precision - 1;
-    // BUG FIXME TODO: u64 does not work for > 64 bit ... e.g. 80 and 128 bit
+    const unsigned           shiftR_exponent = precision - 1;
     const unsigned long long mask_fraction =
         (1ULL << (shiftR_exponent - 1)) | ((1ULL << (shiftR_exponent - 1)) - 1);
     const unsigned long long mask_exponent =
@@ -1113,7 +1125,9 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) {
     auto op0 = IcmpInst->getOperand(0);
     auto op1 = IcmpInst->getOperand(1);
 
-    auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto pred = cmp_inst->getPredicate();
 
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));