about summary refs log tree commit diff
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2020-05-06 11:51:28 +0200
committervan Hauser <vh@thc.org>2020-05-06 11:51:28 +0200
commit80ddb484deb82aefc9ba35c766ffca313d74e377 (patch)
treeb0aa9e3ecacc75550d63d1d66d6d098219958e2b
parentb4e3f22259397629f1e2a66dd17c36d19c4ecb0d (diff)
downloadafl++-80ddb484deb82aefc9ba35c766ffca313d74e377.tar.gz
added InsTrimLTO :-)
-rw-r--r--docs/Changelog.md1
-rw-r--r--docs/env_variables.md11
-rw-r--r--llvm_mode/GNUmakefile9
-rw-r--r--llvm_mode/README.lto.md10
-rw-r--r--llvm_mode/afl-clang-fast.c93
-rw-r--r--llvm_mode/afl-llvm-common.cc17
-rw-r--r--llvm_mode/afl-llvm-common.h9
-rw-r--r--llvm_mode/afl-llvm-lto-instrim.so.cc898
-rw-r--r--llvm_mode/afl-llvm-lto-instrumentation.so.cc44
9 files changed, 1010 insertions, 82 deletions
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 54564a5d..a5b761be 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -19,6 +19,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
      - if LLVM 11 is installed the posix shm_open+mmap is used and a fixed
        address for the shared memory map is used as this increases the
        fuzzing speed
+     - InsTrim now has an LTO version! :-) That is the best and fastest mode!
      - fixes to LTO mode if instrumented edges > MAP_SIZE
      - CTX and NGRAM can now be used together
      - CTX and NGRAM are now also supported in CFG/INSTRIM mode
diff --git a/docs/env_variables.md b/docs/env_variables.md
index ed81c8a3..f5d28981 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -116,6 +116,9 @@ Then there are a few specific features that are only available in llvm_mode:
     afl-clang-lto/afl-clang-lto++ instead of afl-clang-fast, but is only
     built if LLVM 11 or newer is used.
 
+   - AFL_LLVM_INSTRUMENT=CFG will use Control Flow Graph instrumentation.
+     (recommended)
+
    - AFL_LLVM_LTO_AUTODICTIONARY will generate a dictionary in the target
      binary based on string compare and memory compare functions.
      afl-fuzz will automatically get these transmitted when starting to
@@ -139,7 +142,13 @@ Then there are a few specific features that are only available in llvm_mode:
 
 ### INSTRIM
 
-    This feature increases the speed by ~15% without any disadvantages.
+    This feature increases the speed by ~15% without any disadvantages to the
+    classic instrumentation.
+
+    Note that there is also an LTO version (if you have llvm 11 or higher) -
+    that is the best instrumentation we have. Use `afl-clang-lto` to activate.
+    The InsTrim LTO version additionally has all the options and features of
+    LTO (see above).
 
     - Setting AFL_LLVM_INSTRIM or AFL_LLVM_INSTRUMENT=CFG to activates this mode
 
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile
index 93886e47..6a58f6a5 100644
--- a/llvm_mode/GNUmakefile
+++ b/llvm_mode/GNUmakefile
@@ -238,7 +238,7 @@ ifeq "$(TEST_MMAP)" "1"
         LDFLAGS += -Wno-deprecated-declarations
 endif
 
-  PROGS      = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so
+  PROGS      = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../afl-llvm-lto-instrim.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so
 
 # If prerequisites are not given, warn, do not build anything, and exit with code 0
 ifeq "$(LLVMVER)" ""
@@ -330,6 +330,11 @@ ifeq "$(LLVM_LTO)" "1"
 	@$(CLANG_BIN) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m32 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-32.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
 endif
 
+../afl-llvm-lto-instrim.so: afl-llvm-lto-instrim.so.cc afl-llvm-common.o
+ifeq "$(LLVM_LTO)" "1"
+	$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) afl-llvm-common.o
+endif
+
 # laf
 ../split-switches-pass.so:	split-switches-pass.so.cc afl-llvm-common.o | test_deps
 	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) afl-llvm-common.o
@@ -373,7 +378,7 @@ all_done: test_build
 install: all
 	install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
 	if [ -f ../afl-clang-fast -a -f ../libLLVMInsTrim.so -a -f ../afl-llvm-rt.o ]; then set -e; install -m 755 ../afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 ../libLLVMInsTrim.so ../afl-llvm-pass.so ../afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f ../afl-clang-lto ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-llvm-lto-instrumentation.so ../afl-llvm-rt-lto*.o ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../afl-clang-lto ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-llvm-lto-instrumentation.so ../afl-llvm-lto-instrim.so ../afl-llvm-rt-lto*.o ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi
 	if [ -f ../afl-llvm-rt-32.o ]; then set -e; install -m 755 ../afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi
 	if [ -f ../afl-llvm-rt-64.o ]; then set -e; install -m 755 ../afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi
 	if [ -f ../compare-transform-pass.so ]; then set -e; install -m 755 ../compare-transform-pass.so $${DESTDIR}$(HELPER_PATH); fi
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md
index d8e4766d..674531f5 100644
--- a/llvm_mode/README.lto.md
+++ b/llvm_mode/README.lto.md
@@ -6,6 +6,8 @@ This version requires a current llvm 11 compiled from the github master.
 
 1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
    coverage than anything else that is out there in the AFL world
+  1a. Set AFL_LLVM_INSTRUMENT=CFG if you want the InsTrimLTO version
+      (recommended)
 
 2. You can use it together with llvm_mode: laf-intel and whitelisting
    features and can be combined with cmplog/Redqueen
@@ -41,7 +43,7 @@ and many dead ends until we got to this:
    -fsanitize=coverage edge coverage mode :)
 
 The result:
- * 10-20% speed gain compared to llvm_mode
+ * 10-25% speed gain compared to llvm_mode
  * guaranteed non-colliding edge coverage :-)
  * The compile time especially for libraries can be longer
 
@@ -80,11 +82,13 @@ Just use afl-clang-lto like you did with afl-clang-fast or afl-gcc.
 
 Also whitelisting (AFL_LLVM_WHITELIST -> [README.whitelist.md](README.whitelist.md)) and
 laf-intel/compcov (AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work.
-Instrim does not - but we can not really use it anyway for our approach.
+InsTrim (control flow graph instrumentation) is supported and recommended!
+  (set `AFL_LLVM_INTRUMENT=CFG`)
 
 Example:
 ```
-CC=afl-clang-lto CXX=afl-clang-lto++ ./configure
+CC=afl-clang-lto CXX=afl-clang-lto++ RANLIB=llvm-ranlib AR=llvm-ar ./configure
+export AFL_LLVM_INTRUMENT=CFG
 make
 ```
 
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index 2d1b427c..6a6414ad 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -43,7 +43,7 @@ static u8 * obj_path;                  /* Path to runtime libraries         */
 static u8 **cc_params;                 /* Parameters passed to the real CC  */
 static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 static u8   llvm_fullpath[PATH_MAX];
-static u8   instrument_mode, instrument_opt_mode, ngram_size;
+static u8   instrument_mode, instrument_opt_mode, ngram_size, lto_mode;
 static u8 * lto_flag = AFL_CLANG_FLTO;
 static u8 * march_opt = CFLAGS_OPT;
 static u8   debug;
@@ -170,7 +170,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
   else
     ++name;
 
-  if (instrument_mode == INSTRUMENT_LTO)
+  if (lto_mode)
     if (lto_flag[0] != '-')
       FATAL(
           "Using afl-clang-lto is not possible because Makefile magic did not "
@@ -227,8 +227,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
   if (getenv("LAF_TRANSFORM_COMPARES") ||
       getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES")) {
 
-    if (!be_quiet && getenv("AFL_LLVM_LTO_AUTODICTIONARY") &&
-        instrument_mode != INSTRUMENT_LTO)
+    if (!be_quiet && getenv("AFL_LLVM_LTO_AUTODICTIONARY") && lto_mode)
       WARNF(
           "using AFL_LLVM_LAF_TRANSFORM_COMPARES together with "
           "AFL_LLVM_LTO_AUTODICTIONARY makes no sense. Use only "
@@ -281,7 +280,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-  if (instrument_mode == INSTRUMENT_LTO) {
+  if (lto_mode) {
 
     if (getenv("AFL_LLVM_WHITELIST") != NULL) {
 
@@ -295,8 +294,12 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
     cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD);
     cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition";
-    cc_params[cc_par_cnt++] = alloc_printf(
-        "-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path);
+    if (instrument_mode == INSTRUMENT_CFG)
+      cc_params[cc_par_cnt++] =
+          alloc_printf("-Wl,-mllvm=-load=%s/afl-llvm-lto-instrim.so", obj_path);
+    else
+      cc_params[cc_par_cnt++] = alloc_printf(
+          "-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path);
     cc_params[cc_par_cnt++] = lto_flag;
 
   } else {
@@ -391,7 +394,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   if (getenv("AFL_USE_CFISAN")) {
 
-    if (instrument_mode != INSTRUMENT_LTO) {
+    if (!lto_mode) {
 
       uint32_t i = 0, found = 0;
       while (envp[i] != NULL && !found)
@@ -417,9 +420,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   if (getenv("AFL_NO_BUILTIN") || getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES") ||
       getenv("LAF_TRANSFORM_COMPARES") ||
-      (instrument_mode == INSTRUMENT_LTO &&
-       (getenv("AFL_LLVM_LTO_AUTODICTIONARY") ||
-        getenv("AFL_LLVM_AUTODICTIONARY")))) {
+      (lto_mode && (getenv("AFL_LLVM_LTO_AUTODICTIONARY") ||
+                    getenv("AFL_LLVM_AUTODICTIONARY")))) {
 
     cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
     cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
@@ -500,7 +502,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
     case 0:
       cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path);
-      if (instrument_mode == INSTRUMENT_LTO)
+      if (lto_mode)
         cc_params[cc_par_cnt++] =
             alloc_printf("%s/afl-llvm-rt-lto.o", obj_path);
       break;
@@ -509,7 +511,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
       cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path);
       if (access(cc_params[cc_par_cnt - 1], R_OK))
         FATAL("-m32 is not supported by your compiler");
-      if (instrument_mode == INSTRUMENT_LTO) {
+      if (lto_mode) {
 
         cc_params[cc_par_cnt++] =
             alloc_printf("%s/afl-llvm-rt-lto-32.o", obj_path);
@@ -524,7 +526,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
       cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path);
       if (access(cc_params[cc_par_cnt - 1], R_OK))
         FATAL("-m64 is not supported by your compiler");
-      if (instrument_mode == INSTRUMENT_LTO) {
+      if (lto_mode) {
 
         cc_params[cc_par_cnt++] =
             alloc_printf("%s/afl-llvm-rt-lto-64.o", obj_path);
@@ -548,7 +550,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 int main(int argc, char **argv, char **envp) {
 
   int   i;
-  char *callname = "afl-clang-fast", *ptr;
+  char *callname = "afl-clang-fast", *ptr = NULL;
 
   if (getenv("AFL_DEBUG")) {
 
@@ -630,7 +632,13 @@ int main(int argc, char **argv, char **envp) {
       if (strncasecmp(ptr, "cfg", strlen("cfg")) == 0 ||
           strncasecmp(ptr, "instrim", strlen("instrim")) == 0) {
 
-        if (!instrument_mode || instrument_mode == INSTRUMENT_CFG)
+        if (instrument_mode == INSTRUMENT_LTO) {
+
+          instrument_mode = INSTRUMENT_CFG;
+          lto_mode = 1;
+
+        } else if (!instrument_mode || instrument_mode == INSTRUMENT_CFG)
+
           instrument_mode = INSTRUMENT_CFG;
         else
           FATAL("main instrumentation mode already set with %s",
@@ -640,9 +648,10 @@ int main(int argc, char **argv, char **envp) {
 
       if (strncasecmp(ptr, "lto", strlen("lto")) == 0) {
 
+        lto_mode = 1;
         if (!instrument_mode || instrument_mode == INSTRUMENT_LTO)
           instrument_mode = INSTRUMENT_LTO;
-        else
+        else if (instrument_mode != INSTRUMENT_CFG)
           FATAL("main instrumentation mode already set with %s",
                 instrument_mode_string[instrument_mode]);
 
@@ -684,37 +693,49 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
-  if (!instrument_opt_mode)
-    ptr = instrument_mode_string[instrument_mode];
-  else if (instrument_opt_mode == INSTRUMENT_OPT_CTX)
-    ptr = alloc_printf("%s + CTX", instrument_mode_string[instrument_mode]);
-  else if (instrument_opt_mode == INSTRUMENT_OPT_NGRAM)
-    ptr = alloc_printf("%s + NGRAM-%u", instrument_mode_string[instrument_mode],
-                       ngram_size);
-  else
-    ptr = alloc_printf("%s + CTX + NGRAM-%u",
-                       instrument_mode_string[instrument_mode], ngram_size);
-
   if (strstr(argv[0], "afl-clang-lto") != NULL) {
 
-    if (instrument_mode == 0 || instrument_mode == INSTRUMENT_LTO) {
+    if (instrument_mode == 0 || instrument_mode == INSTRUMENT_LTO ||
+        instrument_mode == INSTRUMENT_CFG) {
 
+      lto_mode = 1;
       callname = "afl-clang-lto";
-      instrument_mode = INSTRUMENT_LTO;
-      ptr = instrument_mode_string[instrument_mode];
+      if (!instrument_mode) {
+
+        instrument_mode = INSTRUMENT_LTO;
+        ptr = instrument_mode_string[instrument_mode];
+
+      }
 
     } else {
 
       if (!be_quiet)
         WARNF("afl-clang-lto called with mode %s, using that mode instead",
-              ptr);
+              instrument_mode_string[instrument_mode]);
 
     }
 
   }
 
+  if (!instrument_opt_mode) {
+
+    if (lto_mode && instrument_mode == INSTRUMENT_CFG)
+      ptr = alloc_printf("InsTrimLTO");
+    else
+      ptr = instrument_mode_string[instrument_mode];
+
+  } else if (instrument_opt_mode == INSTRUMENT_OPT_CTX)
+
+    ptr = alloc_printf("%s + CTX", instrument_mode_string[instrument_mode]);
+  else if (instrument_opt_mode == INSTRUMENT_OPT_NGRAM)
+    ptr = alloc_printf("%s + NGRAM-%u", instrument_mode_string[instrument_mode],
+                       ngram_size);
+  else
+    ptr = alloc_printf("%s + CTX + NGRAM-%u",
+                       instrument_mode_string[instrument_mode], ngram_size);
+
 #ifndef AFL_CLANG_FLTO
-  if (instrument_mode == INSTRUMENT_LTO)
+  if (lto_mode)
     FATAL(
         "instrumentation mode LTO specified but LLVM support not available "
         "(requires LLVM 11 or higher)");
@@ -733,7 +754,7 @@ int main(int argc, char **argv, char **envp) {
 
   if (argc < 2 || strcmp(argv[1], "-h") == 0) {
 
-    if (instrument_mode != INSTRUMENT_LTO)
+    if (!lto_mode)
       printf("afl-clang-fast" VERSION " by <lszekeres@google.com> in %s mode\n",
              ptr);
     else
@@ -831,7 +852,7 @@ int main(int argc, char **argv, char **envp) {
 
              getenv("AFL_DEBUG") != NULL) {
 
-    if (instrument_mode != INSTRUMENT_LTO)
+    if (!lto_mode)
 
       SAYF(cCYA "afl-clang-fast" VERSION cRST
                 " by <lszekeres@google.com> in %s mode\n",
@@ -846,7 +867,7 @@ int main(int argc, char **argv, char **envp) {
   }
 
   u8 *ptr2;
-  if (!be_quiet && instrument_mode != INSTRUMENT_LTO &&
+  if (!be_quiet && !lto_mode &&
       ((ptr2 = getenv("AFL_MAP_SIZE")) || (ptr2 = getenv("AFL_MAPSIZE")))) {
 
     u32 map_size = atoi(ptr2);
diff --git a/llvm_mode/afl-llvm-common.cc b/llvm_mode/afl-llvm-common.cc
index 04dd9475..c62b4c14 100644
--- a/llvm_mode/afl-llvm-common.cc
+++ b/llvm_mode/afl-llvm-common.cc
@@ -201,3 +201,20 @@ bool isInWhitelist(llvm::Function *F) {
 
 }
 
+// Calculate the number of average collisions that would occur if all
+// location IDs would be assigned randomly (like normal afl/afl++).
+// This uses the "balls in bins" algorithm.
+unsigned long long int calculateCollisions(uint32_t edges) {
+
+  double                 bins = MAP_SIZE;
+  double                 balls = edges;
+  double                 step1 = 1 - (1 / bins);
+  double                 step2 = pow(step1, balls);
+  double                 step3 = bins * step2;
+  double                 step4 = round(step3);
+  unsigned long long int empty = step4;
+  unsigned long long int collisions = edges - (MAP_SIZE - empty);
+  return collisions;
+
+}
+
diff --git a/llvm_mode/afl-llvm-common.h b/llvm_mode/afl-llvm-common.h
index 5b5e08d0..cf14d2e1 100644
--- a/llvm_mode/afl-llvm-common.h
+++ b/llvm_mode/afl-llvm-common.h
@@ -32,10 +32,11 @@ typedef long double max_align_t;
 #include "llvm/Support/CFG.h"
 #endif
 
-char *getBBName(const llvm::BasicBlock *BB);
-bool  isBlacklisted(const llvm::Function *F);
-void  initWhitelist();
-bool  isInWhitelist(llvm::Function *F);
+char *                 getBBName(const llvm::BasicBlock *BB);
+bool                   isBlacklisted(const llvm::Function *F);
+void                   initWhitelist();
+bool                   isInWhitelist(llvm::Function *F);
+unsigned long long int calculateCollisions(uint32_t edges);
 
 #endif
 
diff --git a/llvm_mode/afl-llvm-lto-instrim.so.cc b/llvm_mode/afl-llvm-lto-instrim.so.cc
new file mode 100644
index 00000000..a5bc337f
--- /dev/null
+++ b/llvm_mode/afl-llvm-lto-instrim.so.cc
@@ -0,0 +1,898 @@
+/*
+   american fuzzy lop++ - LLVM-mode instrumentation pass
+   ---------------------------------------------------
+
+   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This library is plugged into LLVM when invoking clang through afl-clang-fast.
+
+ */
+
+#define AFL_LLVM_PASS
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include <unordered_set>
+#include <list>
+#include <string>
+#include <fstream>
+#include <set>
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+#include "MarkNodes.h"
+#include "afl-llvm-common.h"
+
+#include "config.h"
+#include "debug.h"
+
+using namespace llvm;
+
+static cl::opt<bool> MarkSetOpt("markset", cl::desc("MarkSet"),
+                                cl::init(false));
+static cl::opt<bool> LoopHeadOpt("loophead", cl::desc("LoopHead"),
+                                 cl::init(false));
+
+namespace {
+
+struct InsTrimLTO : public ModulePass {
+
+ protected:
+  uint32_t function_minimum_size = 1;
+  char *   skip_nozero = NULL;
+  int      afl_global_id = 1, debug = 0, autodictionary = 0;
+  uint32_t be_quiet = 0, inst_blocks = 0, inst_funcs = 0;
+  uint64_t map_addr = 0x10000;
+
+ public:
+  static char ID;
+
+  InsTrimLTO() : ModulePass(ID) {
+
+    char *ptr;
+
+    if (getenv("AFL_DEBUG")) debug = 1;
+    if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
+      if ((afl_global_id = atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
+        FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %d\n",
+              ptr, MAP_SIZE - 1);
+
+    skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+
+    ModulePass::getAnalysisUsage(AU);
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+
+  }
+
+  StringRef getPassName() const override {
+
+    return "InstTrim LTO Instrumentation";
+
+  }
+
+  bool runOnModule(Module &M) override {
+
+    char  be_quiet = 0;
+    char *ptr;
+
+    if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
+
+      SAYF(cCYA "LLVMInsTrimLTO" VERSION cRST
+                " by csienslab and Marc \"vanHauser\" Heuse\n");
+
+    } else
+
+      be_quiet = 1;
+
+    /* Process environment variables */
+
+    if (getenv("AFL_LLVM_AUTODICTIONARY") ||
+        getenv("AFL_LLVM_LTO_AUTODICTIONARY"))
+      autodictionary = 1;
+
+    if (getenv("AFL_LLVM_MAP_DYNAMIC")) map_addr = 0;
+
+    if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) {
+
+      uint64_t val;
+      if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) {
+
+        map_addr = 0;
+
+      } else if (map_addr == 0) {
+
+        FATAL(
+            "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used "
+            "together");
+
+      } else if (strncmp(ptr, "0x", 2) != 0) {
+
+        map_addr = 0x10000;  // the default
+
+      } else {
+
+        val = strtoull(ptr, NULL, 16);
+        if (val < 0x100 || val > 0xffffffff00000000) {
+
+          FATAL(
+              "AFL_LLVM_MAP_ADDR must be a value between 0x100 and "
+              "0xffffffff00000000");
+
+        }
+
+        map_addr = val;
+
+      }
+
+    }
+
+    if (debug) { fprintf(stderr, "map address is %lu\n", map_addr); }
+
+    if (getenv("AFL_LLVM_INSTRIM_LOOPHEAD") != NULL ||
+        getenv("LOOPHEAD") != NULL) {
+
+      LoopHeadOpt = true;
+
+    }
+
+    if (getenv("AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK") != NULL)
+      function_minimum_size = 2;
+
+    // this is our default
+    MarkSetOpt = true;
+
+    /* Initialize LLVM instrumentation */
+
+    LLVMContext &                    C = M.getContext();
+    std::vector<std::string>         dictionary;
+    std::vector<CallInst *>          calls;
+    DenseMap<Value *, std::string *> valueMap;
+
+    IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+    IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+    IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
+
+    ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
+    ConstantInt *One = ConstantInt::get(Int8Ty, 1);
+
+    /* Get/set globals for the SHM region. */
+
+    GlobalVariable *AFLMapPtr = NULL;
+    Value *         MapPtrFixed = NULL;
+
+    if (!map_addr) {
+
+      AFLMapPtr =
+          new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
+                             GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+
+    } else {
+
+      ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
+      MapPtrFixed =
+          ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Ty));
+
+    }
+
+    if (autodictionary) {
+
+      /*  Some implementation notes.
+       *
+       *  We try to handle 3 cases:
+       *  - memcmp("foo", arg, 3) <- literal string
+       *  - static char globalvar[] = "foo";
+       *    memcmp(globalvar, arg, 3) <- global variable
+       *  - char localvar[] = "foo";
+       *    memcmp(locallvar, arg, 3) <- local variable
+       *
+       *  The local variable case is the hardest. We can only detect that
+       *  case if there is no reassignment or change in the variable.
+       *  And it might not work across llvm version.
+       *  What we do is hooking the initializer function for local variables
+       *  (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
+       *  variable. And if that variable is then used in a compare function
+       *  we use that noted string.
+       *  This seems not to work for tokens that have a size <= 4 :-(
+       *
+       *  - if the compared length is smaller than the string length we
+       *    save the full string. This is likely better for fuzzing but
+       *    might be wrong in a few cases depending on optimizers
+       *
+       *  - not using StringRef because there is a bug in the llvm 11
+       *    checkout I am using which sometimes points to wrong strings
+       *
+       *  Over and out. Took me a full day. damn. mh/vh
+       */
+
+      for (Function &F : M) {
+
+        for (auto &BB : F) {
+
+          for (auto &IN : BB) {
+
+            CallInst *callInst = nullptr;
+
+            if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+              bool    isStrcmp = true;
+              bool    isMemcmp = true;
+              bool    isStrncmp = true;
+              bool    isStrcasecmp = true;
+              bool    isStrncasecmp = true;
+              bool    isIntMemcpy = true;
+              bool    addedNull = false;
+              uint8_t optLen = 0;
+
+              Function *Callee = callInst->getCalledFunction();
+              if (!Callee) continue;
+              if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+              std::string FuncName = Callee->getName().str();
+              isStrcmp &= !FuncName.compare("strcmp");
+              isMemcmp &= !FuncName.compare("memcmp");
+              isStrncmp &= !FuncName.compare("strncmp");
+              isStrcasecmp &= !FuncName.compare("strcasecmp");
+              isStrncasecmp &= !FuncName.compare("strncasecmp");
+              isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
+
+              if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+                  !isStrncasecmp && !isIntMemcpy)
+                continue;
+
+              /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp
+               * function prototype */
+              FunctionType *FT = Callee->getFunctionType();
+
+              isStrcmp &= FT->getNumParams() == 2 &&
+                          FT->getReturnType()->isIntegerTy(32) &&
+                          FT->getParamType(0) == FT->getParamType(1) &&
+                          FT->getParamType(0) ==
+                              IntegerType::getInt8PtrTy(M.getContext());
+              isStrcasecmp &= FT->getNumParams() == 2 &&
+                              FT->getReturnType()->isIntegerTy(32) &&
+                              FT->getParamType(0) == FT->getParamType(1) &&
+                              FT->getParamType(0) ==
+                                  IntegerType::getInt8PtrTy(M.getContext());
+              isMemcmp &= FT->getNumParams() == 3 &&
+                          FT->getReturnType()->isIntegerTy(32) &&
+                          FT->getParamType(0)->isPointerTy() &&
+                          FT->getParamType(1)->isPointerTy() &&
+                          FT->getParamType(2)->isIntegerTy();
+              isStrncmp &= FT->getNumParams() == 3 &&
+                           FT->getReturnType()->isIntegerTy(32) &&
+                           FT->getParamType(0) == FT->getParamType(1) &&
+                           FT->getParamType(0) ==
+                               IntegerType::getInt8PtrTy(M.getContext()) &&
+                           FT->getParamType(2)->isIntegerTy();
+              isStrncasecmp &= FT->getNumParams() == 3 &&
+                               FT->getReturnType()->isIntegerTy(32) &&
+                               FT->getParamType(0) == FT->getParamType(1) &&
+                               FT->getParamType(0) ==
+                                   IntegerType::getInt8PtrTy(M.getContext()) &&
+                               FT->getParamType(2)->isIntegerTy();
+
+              if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+                  !isStrncasecmp && !isIntMemcpy)
+                continue;
+
+              /* is a str{n,}{case,}cmp/memcmp, check if we have
+               * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
+               * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x,
+               * ..) memcmp(x, "const", ..) or memcmp("const", x, ..) */
+              Value *Str1P = callInst->getArgOperand(0),
+                    *Str2P = callInst->getArgOperand(1);
+              std::string Str1, Str2;
+              StringRef   TmpStr;
+              bool        HasStr1 = getConstantStringInfo(Str1P, TmpStr);
+              if (TmpStr.empty())
+                HasStr1 = false;
+              else
+                Str1 = TmpStr.str();
+              bool HasStr2 = getConstantStringInfo(Str2P, TmpStr);
+              if (TmpStr.empty())
+                HasStr2 = false;
+              else
+                Str2 = TmpStr.str();
+
+              if (debug)
+                fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
+                        FuncName.c_str(), Str1P, Str1P->getName().str().c_str(),
+                        Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P,
+                        Str2P->getName().str().c_str(), Str2.c_str(),
+                        HasStr2 == true ? "true" : "false");
+
+              // we handle the 2nd parameter first because of llvm memcpy
+              if (!HasStr2) {
+
+                auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+                if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+                  if (auto *Var =
+                          dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+                    if (auto *Array = dyn_cast<ConstantDataArray>(
+                            Var->getInitializer())) {
+
+                      HasStr2 = true;
+                      Str2 = Array->getAsString().str();
+
+                    }
+
+                  }
+
+                }
+
+              }
+
+              // for the internal memcpy routine we only care for the second
+              // parameter and are not reporting anything.
+              if (isIntMemcpy == true) {
+
+                if (HasStr2 == true) {
+
+                  Value *      op2 = callInst->getArgOperand(2);
+                  ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+                  if (ilen) {
+
+                    uint64_t literalLength = Str2.size();
+                    uint64_t optLength = ilen->getZExtValue();
+                    if (literalLength + 1 == optLength) {
+
+                      Str2.append("\0", 1);  // add null byte
+                      addedNull = true;
+
+                    }
+
+                  }
+
+                  valueMap[Str1P] = new std::string(Str2);
+
+                  if (debug)
+                    fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P);
+                  continue;
+
+                }
+
+                continue;
+
+              }
+
+              // Neither a literal nor a global variable?
+              // maybe it is a local variable that we saved
+              if (!HasStr2) {
+
+                std::string *strng = valueMap[Str2P];
+                if (strng && !strng->empty()) {
+
+                  Str2 = *strng;
+                  HasStr2 = true;
+                  if (debug)
+                    fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
+                            Str2P);
+
+                }
+
+              }
+
+              if (!HasStr1) {
+
+                auto Ptr = dyn_cast<ConstantExpr>(Str1P);
+
+                if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+                  if (auto *Var =
+                          dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+                    if (auto *Array = dyn_cast<ConstantDataArray>(
+                            Var->getInitializer())) {
+
+                      HasStr1 = true;
+                      Str1 = Array->getAsString().str();
+
+                    }
+
+                  }
+
+                }
+
+              }
+
+              // Neither a literal nor a global variable?
+              // maybe it is a local variable that we saved
+              if (!HasStr1) {
+
+                std::string *strng = valueMap[Str1P];
+                if (strng && !strng->empty()) {
+
+                  Str1 = *strng;
+                  HasStr1 = true;
+                  if (debug)
+                    fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
+                            Str1P);
+
+                }
+
+              }
+
+              /* handle cases of one string is const, one string is variable */
+              if (!(HasStr1 ^ HasStr2)) continue;
+
+              std::string thestring;
+
+              if (HasStr1)
+                thestring = Str1;
+              else
+                thestring = Str2;
+
+              optLen = thestring.length();
+
+              if (isMemcmp || isStrncmp || isStrncasecmp) {
+
+                Value *      op2 = callInst->getArgOperand(2);
+                ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+                if (ilen) {
+
+                  uint64_t literalLength = optLen;
+                  optLen = ilen->getZExtValue();
+                  if (literalLength + 1 == optLen) {  // add null byte
+                    thestring.append("\0", 1);
+                    addedNull = true;
+
+                  }
+
+                }
+
+              }
+
+              // add null byte if this is a string compare function and a null
+              // was not already added
+              if (addedNull == false && !isMemcmp) {
+
+                thestring.append("\0", 1);  // add null byte
+                optLen++;
+
+              }
+
+              if (!be_quiet) {
+
+                std::string outstring;
+                fprintf(stderr, "%s: length %u/%u \"", FuncName.c_str(), optLen,
+                        (unsigned int)thestring.length());
+                for (uint8_t i = 0; i < thestring.length(); i++) {
+
+                  uint8_t c = thestring[i];
+                  if (c <= 32 || c >= 127)
+                    fprintf(stderr, "\\x%02x", c);
+                  else
+                    fprintf(stderr, "%c", c);
+
+                }
+
+                fprintf(stderr, "\"\n");
+
+              }
+
+              // we take the longer string, even if the compare was to a
+              // shorter part. Note that depending on the optimizer of the
+              // compiler this can be wrong, but it is more likely that this
+              // is helping the fuzzer
+              if (optLen != thestring.length()) optLen = thestring.length();
+              if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
+              if (optLen < MIN_AUTO_EXTRA)  // too short? skip
+                continue;
+
+              dictionary.push_back(thestring.substr(0, optLen));
+
+            }
+
+          }
+
+        }
+
+      }
+
+    }
+
+    /* InsTrim instrumentation starts here */
+
+    u64 total_rs = 0;
+    u64 total_hs = 0;
+
+    for (Function &F : M) {
+
+      if (debug) {
+
+        uint32_t bb_cnt = 0;
+
+        for (auto &BB : F)
+          if (BB.size() > 0) ++bb_cnt;
+        SAYF(cMGN "[D] " cRST "Function %s size %zu %u\n",
+             F.getName().str().c_str(), F.size(), bb_cnt);
+
+      }
+
+      // if the function below our minimum size skip it (1 or 2)
+      if (F.size() < function_minimum_size) continue;
+      if (isBlacklisted(&F)) continue;
+
+      std::unordered_set<BasicBlock *> MS;
+      if (!MarkSetOpt) {
+
+        for (auto &BB : F) {
+
+          MS.insert(&BB);
+
+        }
+
+        total_rs += F.size();
+
+      } else {
+
+        auto Result = markNodes(&F);
+        auto RS = Result.first;
+        auto HS = Result.second;
+
+        MS.insert(RS.begin(), RS.end());
+        if (!LoopHeadOpt) {
+
+          MS.insert(HS.begin(), HS.end());
+          total_rs += MS.size();
+
+        } else {
+
+          DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeSet;
+          DominatorTreeWrapperPass *                      DTWP =
+              &getAnalysis<DominatorTreeWrapperPass>(F);
+          auto DT = &DTWP->getDomTree();
+
+          total_rs += RS.size();
+          total_hs += HS.size();
+
+          for (BasicBlock *BB : HS) {
+
+            bool Inserted = false;
+            for (auto BI = pred_begin(BB), BE = pred_end(BB); BI != BE; ++BI) {
+
+              auto Edge = BasicBlockEdge(*BI, BB);
+              if (Edge.isSingleEdge() && DT->dominates(Edge, BB)) {
+
+                EdgeSet.insert({*BI, BB});
+                Inserted = true;
+                break;
+
+              }
+
+            }
+
+            if (!Inserted) {
+
+              MS.insert(BB);
+              total_rs += 1;
+              total_hs -= 1;
+
+            }
+
+          }
+
+          for (auto I = EdgeSet.begin(), E = EdgeSet.end(); I != E; ++I) {
+
+            auto PredBB = I->first;
+            auto SuccBB = I->second;
+            auto NewBB = SplitBlockPredecessors(SuccBB, {PredBB}, ".split", DT,
+                                                nullptr, nullptr, false);
+            MS.insert(NewBB);
+
+          }
+
+        }
+
+      }
+
+      for (BasicBlock &BB : F) {
+
+        auto        PI = pred_begin(&BB);
+        auto        PE = pred_end(&BB);
+        IRBuilder<> IRB(&*BB.getFirstInsertionPt());
+        Value *     L = NULL;
+
+        if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
+
+        if (PI == PE) {
+
+          L = ConstantInt::get(Int32Ty, afl_global_id++);
+
+        } else {
+
+          auto *PN = PHINode::Create(Int32Ty, 0, "", &*BB.begin());
+          DenseMap<BasicBlock *, unsigned> PredMap;
+          for (auto PI = pred_begin(&BB), PE = pred_end(&BB); PI != PE; ++PI) {
+
+            BasicBlock *PBB = *PI;
+            auto        It = PredMap.insert({PBB, afl_global_id++});
+            unsigned    Label = It.first->second;
+            PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
+
+          }
+
+          L = PN;
+
+        }
+
+        /* Load SHM pointer */
+        Value *MapPtrIdx;
+
+        if (map_addr) {
+
+          MapPtrIdx = IRB.CreateGEP(MapPtrFixed, L);
+
+        } else {
+
+          LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+          MapPtr->setMetadata(M.getMDKindID("nosanitize"),
+                              MDNode::get(C, None));
+          MapPtrIdx = IRB.CreateGEP(MapPtr, L);
+
+        }
+
+        /* Update bitmap */
+        LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+        Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+        Value *Incr = IRB.CreateAdd(Counter, One);
+
+        if (skip_nozero) {
+
+          auto cf = IRB.CreateICmpEQ(Incr, Zero);
+          auto carry = IRB.CreateZExt(cf, Int8Ty);
+          Incr = IRB.CreateAdd(Incr, carry);
+
+        }
+
+        IRB.CreateStore(Incr, MapPtrIdx)
+            ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+        // done :)
+
+        inst_blocks++;
+
+      }
+
+    }
+
+    // save highest location ID to global variable
+    // do this after each function to fail faster
+    if (!be_quiet && afl_global_id > MAP_SIZE &&
+        afl_global_id > FS_OPT_MAX_MAPSIZE) {
+
+      uint32_t pow2map = 1, map = afl_global_id;
+      while ((map = map >> 1))
+        pow2map++;
+      WARNF(
+          "We have %u blocks to instrument but the map size is only %u. Either "
+          "edit config.h and set MAP_SIZE_POW2 from %u to %u, then recompile "
+          "afl-fuzz and llvm_mode and then make this target - or set "
+          "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this "
+          "target.",
+          afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id);
+
+    }
+
+    if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) {
+
+      // yes we could create our own function, insert it into ctors ...
+      // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o
+
+      Function *f = M.getFunction("__afl_auto_init_globals");
+
+      if (!f) {
+
+        fprintf(stderr,
+                "Error: init function could not be found (this should not "
+                "happen)\n");
+        exit(-1);
+
+      }
+
+      BasicBlock *bb = &f->getEntryBlock();
+      if (!bb) {
+
+        fprintf(stderr,
+                "Error: init function does not have an EntryBlock (this should "
+                "not happen)\n");
+        exit(-1);
+
+      }
+
+      BasicBlock::iterator IP = bb->getFirstInsertionPt();
+      IRBuilder<>          IRB(&(*IP));
+
+      if (map_addr) {
+
+        GlobalVariable *AFLMapAddrFixed =
+            new GlobalVariable(M, Int64Ty, true, GlobalValue::ExternalLinkage,
+                               0, "__afl_map_addr");
+        ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
+        StoreInst *  StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed);
+        StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+      }
+
+      if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
+
+        uint32_t write_loc = afl_global_id;
+
+        if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+
+        GlobalVariable *AFLFinalLoc =
+            new GlobalVariable(M, Int32Ty, true, GlobalValue::ExternalLinkage,
+                               0, "__afl_final_loc");
+        ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc);
+        StoreInst *  StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
+        StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
+                                   MDNode::get(C, None));
+
+      }
+
+      if (dictionary.size()) {
+
+        size_t memlen = 0, count = 0, offset = 0;
+        char * ptr;
+
+        for (auto token : dictionary) {
+
+          memlen += token.length();
+          count++;
+
+        }
+
+        if (!be_quiet)
+          printf("AUTODICTIONARY: %lu string%s found\n", count,
+                 count == 1 ? "" : "s");
+
+        if (count) {
+
+          if ((ptr = (char *)malloc(memlen + count)) == NULL) {
+
+            fprintf(stderr, "Error: malloc for %lu bytes failed!\n",
+                    memlen + count);
+            exit(-1);
+
+          }
+
+          count = 0;
+
+          for (auto token : dictionary) {
+
+            if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
+
+              ptr[offset++] = (uint8_t)token.length();
+              memcpy(ptr + offset, token.c_str(), token.length());
+              offset += token.length();
+              count++;
+
+            }
+
+          }
+
+          GlobalVariable *AFLDictionaryLen = new GlobalVariable(
+              M, Int32Ty, false, GlobalValue::ExternalLinkage, 0,
+              "__afl_dictionary_len");
+          ConstantInt *const_len = ConstantInt::get(Int32Ty, offset);
+          StoreInst *  StoreDictLen =
+              IRB.CreateStore(const_len, AFLDictionaryLen);
+          StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
+                                    MDNode::get(C, None));
+
+          ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset);
+          GlobalVariable *AFLInternalDictionary = new GlobalVariable(
+              M, ArrayTy, true, GlobalValue::ExternalLinkage,
+              ConstantDataArray::get(
+                  C, *(new ArrayRef<char>((char *)ptr, offset))),
+              "__afl_internal_dictionary");
+          AFLInternalDictionary->setInitializer(ConstantDataArray::get(
+              C, *(new ArrayRef<char>((char *)ptr, offset))));
+          AFLInternalDictionary->setConstant(true);
+
+          GlobalVariable *AFLDictionary = new GlobalVariable(
+              M, PointerType::get(Int8Ty, 0), false,
+              GlobalValue::ExternalLinkage, 0, "__afl_dictionary");
+
+          Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero);
+          Value *AFLDictPtr =
+              IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0));
+          StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary);
+          StoreDict->setMetadata(M.getMDKindID("nosanitize"),
+                                 MDNode::get(C, None));
+
+        }
+
+      }
+
+    }
+
+    /* Say something nice. */
+
+    if (!be_quiet) {
+
+      if (!inst_blocks)
+        WARNF("No instrumentation targets found.");
+      else {
+
+        char modeline[100];
+        snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+                 getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
+                 getenv("AFL_USE_ASAN") ? ", ASAN" : "",
+                 getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+                 getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+                 getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
+        OKF("Instrumented %u locations (%llu, %llu) with no collisions (on "
+            "average %llu "
+            "collisions would be in afl-gcc/afl-clang-fast) (%s mode).",
+            inst_blocks, total_rs, total_hs, calculateCollisions(inst_blocks),
+            modeline);
+
+      }
+
+    }
+
+    return true;
+
+  }
+
+};  // end of struct InsTrim
+
+}  // end of anonymous namespace
+
+char InsTrimLTO::ID = 0;
+
+static void registerInsTrimLTO(const PassManagerBuilder &,
+                               legacy::PassManagerBase &PM) {
+
+  PM.add(new InsTrimLTO());
+
+}
+
+static RegisterPass<InsTrimLTO> X("afl-lto-instrim",
+                                  "afl++ InsTrim LTO instrumentation pass",
+                                  false, false);
+
+static RegisterStandardPasses RegisterInsTrimLTO(
+    PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerInsTrimLTO);
+
diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
index 838e45af..79081d37 100644
--- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc
+++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
@@ -1,14 +1,9 @@
 /*
-   american fuzzy lop++ - LLVM-mode instrumentation pass
-   ---------------------------------------------------
+   american fuzzy lop++ - LLVM LTO instrumentation pass
+   ----------------------------------------------------
 
-   Written by Laszlo Szekeres <lszekeres@google.com> and
-              Michal Zalewski
+   Written by Marc Heuse <mh@mh-sec.de>
 
-   LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted
-   from afl-as.c are Michal's fault.
-
-   Copyright 2015, 2016 Google Inc. All rights reserved.
    Copyright 2019-2020 AFLplusplus Project. All rights reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,9 +12,7 @@
 
      http://www.apache.org/licenses/LICENSE-2.0
 
-   This library is plugged into LLVM when invoking clang through afl-clang-fast.
-   It tells the compiler to add code roughly equivalent to the bits discussed
-   in ../afl-as.h.
+   This library is plugged into LLVM when invoking clang through afl-clang-lto.
 
  */
 
@@ -32,11 +25,12 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
+#include <sys/time.h>
 
 #include <list>
 #include <string>
 #include <fstream>
-#include <sys/time.h>
+#include <set>
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/ADT/Statistic.h"
@@ -56,7 +50,6 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Pass.h"
 
-#include <set>
 #include "afl-llvm-common.h"
 
 using namespace llvm;
@@ -90,23 +83,6 @@ class AFLLTOPass : public ModulePass {
 
   }
 
-  // Calculate the number of average collisions that would occur if all
-  // location IDs would be assigned randomly (like normal afl/afl++).
-  // This uses the "balls in bins" algorithm.
-  unsigned long long int calculateCollisions(uint32_t edges) {
-
-    double                 bins = MAP_SIZE;
-    double                 balls = edges;
-    double                 step1 = 1 - (1 / bins);
-    double                 step2 = pow(step1, balls);
-    double                 step3 = bins * step2;
-    double                 step4 = round(step3);
-    unsigned long long int empty = step4;
-    unsigned long long int collisions = edges - (MAP_SIZE - empty);
-    return collisions;
-
-  }
-
   bool runOnModule(Module &M) override;
 
  protected:
@@ -131,8 +107,6 @@ bool AFLLTOPass::runOnModule(Module &M) {
   IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
   IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
 
-  if (getenv("AFL_DEBUG")) debug = 1;
-
   /* Show a banner */
 
   if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
@@ -185,12 +159,10 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
   if (debug) { fprintf(stderr, "map address is %lu\n", map_addr); }
 
-  /* Get globals for the SHM region and the previous location. Note that
-     __afl_prev_loc is thread-local. */
+  /* Get/set the globals for the SHM region. */
 
   GlobalVariable *AFLMapPtr = NULL;
-  ;
-  Value *MapPtrFixed = NULL;
+  Value *         MapPtrFixed = NULL;
 
   if (!map_addr) {