28 files changed, 1883 insertions, 2126 deletions
diff --git a/instrumentation/README.cmplog.md b/instrumentation/README.cmplog.md
index a796c7a7..146b4620 100644
--- a/instrumentation/README.cmplog.md
+++ b/instrumentation/README.cmplog.md
@@ -1,11 +1,12 @@
 # CmpLog instrumentation
 
-The CmpLog instrumentation enables logging of comparison operands in a
-shared memory.
+The CmpLog instrumentation enables logging of comparison operands in a shared
+memory.
 
-These values can be used by various mutators built on top of it.
-At the moment we support the RedQueen mutator (input-2-state instructions only), 
-for details see [the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf).
+These values can be used by various mutators built on top of it. At the moment,
+we support the RedQueen mutator (input-2-state instructions only), for details
+see
+[the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf).
 
 ## Build
 
@@ -14,7 +15,8 @@ program.
 
 The first version is built using the regular AFL++ instrumentation.
 
-The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during the compilation.
+The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during
+the compilation.
 
 For example:
 
@@ -32,8 +34,8 @@ unset AFL_LLVM_CMPLOG
 
 ## Use
 
-AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary (the second
-build).
+AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary
+(the second build).
 
 For example:
 
@@ -41,4 +43,4 @@ For example:
 afl-fuzz -i input -o output -c ./program.cmplog -m none -- ./program.afl @@
 ```
 
-Be sure to use `-m none` because CmpLog can map a lot of pages.
+Be sure to use `-m none` because CmpLog can map a lot of pages.
\ No newline at end of file
diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md
deleted file mode 100644
index 335e9921..00000000
--- a/instrumentation/README.ctx.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# AFL Context Sensitive Branch Coverage
-
-## What is this?
-
-This is an LLVM-based implementation of the context sensitive branch coverage.
-
-Basically every function gets its own ID and, every time when an edge is logged,
-all the IDs in the callstack are hashed and combined with the edge transition
-hash to augment the classic edge coverage with the information about the
-calling context.
-
-So if both function A and function B call a function C, the coverage
-collected in C will be different.
-
-In math the coverage is collected as follows:
-`map[current_location_ID ^ previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
-
-The callstack hash is produced XOR-ing the function IDs to avoid explosion with
-recursive functions.
-
-## Usage
-
-Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
-
-It is highly recommended to increase the MAP_SIZE_POW2 definition in
-config.h to at least 18 and maybe up to 20 for this as otherwise too
-many map collisions occur.
-
-## Caller Branch Coverage
-
-If the context sensitive coverage introduces too may collisions and becoming
-detrimental, the user can choose to augment edge coverage with just the
-called function ID, instead of the entire callstack hash.
-
-In math the coverage is collected as follows:
-`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
-
-Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment variable.
diff --git a/instrumentation/README.gcc_plugin.md b/instrumentation/README.gcc_plugin.md
index 230ceb73..ef38662b 100644
--- a/instrumentation/README.gcc_plugin.md
+++ b/instrumentation/README.gcc_plugin.md
@@ -1,64 +1,68 @@
 # GCC-based instrumentation for afl-fuzz
 
-See [../README.md](../README.md) for the general instruction manual.
-See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation.
+For the general instruction manual, see [../README.md](../README.md). For the
+LLVM-based instrumentation, see [README.llvm.md](README.llvm.md).
 
 This document describes how to build and use `afl-gcc-fast` and `afl-g++-fast`,
 which instrument the target with the help of gcc plugins.
 
-TLDR:
-  * check the version of your gcc compiler: `gcc --version`
-  * `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc plugins
-  * `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can set `AFL_CC`/`AFL_CXX`
-    to point to these!
-  * `make`
-  * just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with `afl-clang-fast`
+TL;DR:
+* Check the version of your gcc compiler: `gcc --version`
+* `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc
+  plugins.
+* `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can
+  set `AFL_CC`/`AFL_CXX` to point to these!
+* `make`
+* Just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with
+  `afl-clang-fast`.
 
 ## 1) Introduction
 
-The code in this directory allows to instrument programs for AFL using
-true compiler-level instrumentation, instead of the more crude
-assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
-several interesting properties:
+The code in this directory allows to instrument programs for AFL++ using true
+compiler-level instrumentation, instead of the more crude assembly-level
+rewriting approach taken by afl-gcc and afl-clang. This has several interesting
+properties:
 
-  - The compiler can make many optimizations that are hard to pull off when
-    manually inserting assembly. As a result, some slow, CPU-bound programs will
-    run up to around faster.
+- The compiler can make many optimizations that are hard to pull off when
+  manually inserting assembly. As a result, some slow, CPU-bound programs will
+  run up to around faster.
 
-    The gains are less pronounced for fast binaries, where the speed is limited
-    chiefly by the cost of creating new processes. In such cases, the gain will
-    probably stay within 10%.
+  The gains are less pronounced for fast binaries, where the speed is limited
+  chiefly by the cost of creating new processes. In such cases, the gain will
+  probably stay within 10%.
 
-  - The instrumentation is CPU-independent. At least in principle, you should
-    be able to rely on it to fuzz programs on non-x86 architectures (after
-    building `afl-fuzz` with `AFL_NOX86=1`).
+- The instrumentation is CPU-independent. At least in principle, you should be
+  able to rely on it to fuzz programs on non-x86 architectures (after building
+  `afl-fuzz` with `AFL_NOX86=1`).
 
-  - Because the feature relies on the internals of GCC, it is gcc-specific
-    and will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an alternative).
+- Because the feature relies on the internals of GCC, it is gcc-specific and
+  will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an
+  alternative).
 
 Once this implementation is shown to be sufficiently robust and portable, it
-will probably replace afl-gcc. For now, it can be built separately and
-co-exists with the original code.
+will probably replace afl-gcc. For now, it can be built separately and co-exists
+with the original code.
 
 The idea and much of the implementation comes from Laszlo Szekeres.
 
 ## 2) How to use
 
-In order to leverage this mechanism, you need to have modern enough GCC
-(>= version 4.5.0) and the plugin development headers installed on your system. That
+In order to leverage this mechanism, you need to have modern enough GCC (>=
+version 4.5.0) and the plugin development headers installed on your system. That
 should be all you need. On Debian machines, these headers can be acquired by
 installing the `gcc-VERSION-plugin-dev` packages.
 
 To build the instrumentation itself, type `make`. This will generate binaries
-called `afl-gcc-fast` and `afl-g++-fast` in the parent directory. 
+called `afl-gcc-fast` and `afl-g++-fast` in the parent directory.
 
-The gcc and g++ compiler links have to point to gcc-VERSION - or set these
-by pointing the environment variables `AFL_CC`/`AFL_CXX` to them.
-If the `CC`/`CXX` environment variables have been set, those compilers will be 
-preferred over those from the `AFL_CC`/`AFL_CXX` settings.
+The gcc and g++ compiler links have to point to gcc-VERSION - or set these by
+pointing the environment variables `AFL_CC`/`AFL_CXX` to them. If the `CC`/`CXX`
+environment variables have been set, those compilers will be preferred over
+those from the `AFL_CC`/`AFL_CXX` settings.
 
 Once this is done, you can instrument third-party code in a way similar to the
-standard operating mode of AFL, e.g.:
+standard operating mode of AFL++, e.g.:
+
 ```
   CC=/path/to/afl/afl-gcc-fast
   CXX=/path/to/afl/afl-g++-fast
@@ -66,15 +70,15 @@ standard operating mode of AFL, e.g.:
   ./configure [...options...]
   make
 ```
+
 Note: We also used `CXX` to set the C++ compiler to `afl-g++-fast` for C++ code.
 
 The tool honors roughly the same environmental variables as `afl-gcc` (see
-[env_variables.md](../docs/env_variables.md). This includes `AFL_INST_RATIO`,
-`AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`.
+[docs/env_variables.md](../docs/env_variables.md). This includes
+`AFL_INST_RATIO`, `AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`.
 
-Note: if you want the GCC plugin to be installed on your system for all
-users, you need to build it before issuing 'make install' in the parent
-directory.
+Note: if you want the GCC plugin to be installed on your system for all users,
+you need to build it before issuing 'make install' in the parent directory.
 
 ## 3) Gotchas, feedback, bugs
 
@@ -83,93 +87,15 @@ reports to afl@aflplus.plus.
 
 ## 4) Bonus feature #1: deferred initialization
 
-AFL tries to optimize performance by executing the targeted binary just once,
-stopping it just before main(), and then cloning this "main" process to get
-a steady supply of targets to fuzz.
-
-Although this approach eliminates much of the OS-, linker- and libc-level
-costs of executing the program, it does not always help with binaries that
-perform other time-consuming initialization steps - say, parsing a large config
-file before getting to the fuzzed data.
-
-In such cases, it's beneficial to initialize the forkserver a bit later, once
-most of the initialization work is already done, but before the binary attempts
-to read the fuzzed input and parse it; in some cases, this can offer a 10x+
-performance gain. You can implement delayed initialization in GCC mode in a
-fairly simple way.
-
-First, locate a suitable location in the code where the delayed cloning can
-take place. This needs to be done with *extreme* care to avoid breaking the
-binary. In particular, the program will probably malfunction if you select
-a location after:
-
-  - The creation of any vital threads or child processes - since the forkserver
-    can't clone them easily.
-
-  - The initialization of timers via setitimer() or equivalent calls.
-
-  - The creation of temporary files, network sockets, offset-sensitive file
-    descriptors, and similar shared-state resources - but only provided that
-    their state meaningfully influences the behavior of the program later on.
-
-  - Any access to the fuzzed input, including reading the metadata about its
-    size.
-
-With the location selected, add this code in the appropriate spot:
-
-```
-#ifdef __AFL_HAVE_MANUAL_CONTROL
-  __AFL_INIT();
-#endif
-```
-
-You don't need the #ifdef guards, but they will make the program still work as
-usual when compiled with a compiler other than afl-gcc-fast/afl-clang-fast.
-
-Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
-*not* generate a deferred-initialization binary) - and you should be all set!
+See
+[README.persistent_mode.md#3) Deferred initialization](README.persistent_mode.md#3-deferred-initialization).
 
 ## 5) Bonus feature #2: persistent mode
 
-Some libraries provide APIs that are stateless, or whose state can be reset in
-between processing different input files. When such a reset is performed, a
-single long-lived process can be reused to try out multiple test cases,
-eliminating the need for repeated `fork()` calls and the associated OS overhead.
-
-The basic structure of the program that does this would be:
-
-```
-  while (__AFL_LOOP(1000)) {
-
-    /* Read input data. */
-    /* Call library code to be fuzzed. */
-    /* Reset state. */
-
-  }
-
-  /* Exit normally */
-```
-
-The numerical value specified within the loop controls the maximum number
-of iterations before AFL will restart the process from scratch. This minimizes
-the impact of memory leaks and similar glitches; 1000 is a good starting point.
-
-A more detailed template is shown in ../utils/persistent_mode/.
-Similarly to the previous mode, the feature works only with afl-gcc-fast or
-afl-clang-fast; #ifdef guards can be used to suppress it when using other
-compilers.
-
-Note that as with the previous mode, the feature is easy to misuse; if you
-do not reset the critical state fully, you may end up with false positives or
-waste a whole lot of CPU power doing nothing useful at all. Be particularly
-wary of memory leaks and the state of file descriptors.
-
-When running in this mode, the execution paths will inherently vary a bit
-depending on whether the input loop is being entered for the first time or
-executed again. To avoid spurious warnings, the feature implies
-`AFL_NO_VAR_CHECK` and hides the "variable path" warnings in the UI.
+See
+[README.persistent_mode.md#4) Persistent mode](README.persistent_mode.md#4-persistent-mode).
 
 ## 6) Bonus feature #3: selective instrumentation
 
-It can be more effective to fuzzing to only instrument parts of the code.
-For details see [README.instrument_list.md](README.instrument_list.md).
+It can be more effective to fuzzing to only instrument parts of the code. For
+details, see [README.instrument_list.md](README.instrument_list.md).
\ No newline at end of file
diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md
index 7db9c055..3ed64807 100644
--- a/instrumentation/README.instrument_list.md
+++ b/instrumentation/README.instrument_list.md
@@ -1,80 +1,84 @@
 # Using AFL++ with partial instrumentation
 
-  This file describes two different mechanisms to selectively instrument
-  only specific parts in the target.
+This file describes two different mechanisms to selectively instrument only
+specific parts in the target.
 
-  Both mechanisms work for LLVM and GCC_PLUGIN, but not for afl-clang/afl-gcc.
+Both mechanisms work for LLVM and GCC_PLUGIN, but not for afl-clang/afl-gcc.
 
 ## 1) Description and purpose
 
 When building and testing complex programs where only a part of the program is
-the fuzzing target, it often helps to only instrument the necessary parts of
-the program, leaving the rest uninstrumented. This helps to focus the fuzzer
-on the important parts of the program, avoiding undesired noise and
-disturbance by uninteresting code being exercised.
+the fuzzing target, it often helps to only instrument the necessary parts of the
+program, leaving the rest uninstrumented. This helps to focus the fuzzer on the
+important parts of the program, avoiding undesired noise and disturbance by
+uninteresting code being exercised.
 
 For this purpose, "partial instrumentation" support is provided by AFL++ that
 allows to specify what should be instrumented and what not.
 
-Both mechanisms can be used together.
+Both mechanisms for partial instrumentation can be used together.
 
 ## 2) Selective instrumentation with __AFL_COVERAGE_... directives
 
-In this mechanism the selective instrumentation is done in the source code.
+In this mechanism, the selective instrumentation is done in the source code.
 
-After the includes a special define has to be made, eg.:
+After the includes, a special define has to be made, e.g.:
 
 ```
 #include <stdio.h>
 #include <stdint.h>
 // ...
- 
+
 __AFL_COVERAGE();  // <- required for this feature to work
 ```
 
-If you want to disable the coverage at startup until you specify coverage
-should be started, then add `__AFL_COVERAGE_START_OFF();` at that position.
+If you want to disable the coverage at startup until you specify coverage should
+be started, then add `__AFL_COVERAGE_START_OFF();` at that position.
 
-From here on out you have the following macros available that you can use
-in any function where you want:
+From here on out, you have the following macros available that you can use in
+any function where you want:
 
-  * `__AFL_COVERAGE_ON();` - enable coverage from this point onwards
-  * `__AFL_COVERAGE_OFF();` - disable coverage from this point onwards
-  * `__AFL_COVERAGE_DISCARD();` - reset all coverage gathered until this point
-  * `__AFL_COVERAGE_SKIP();` - mark this test case as unimportant. Whatever happens, afl-fuzz will ignore it.
+* `__AFL_COVERAGE_ON();` - Enable coverage from this point onwards.
+* `__AFL_COVERAGE_OFF();` - Disable coverage from this point onwards.
+* `__AFL_COVERAGE_DISCARD();` - Reset all coverage gathered until this point.
+* `__AFL_COVERAGE_SKIP();` - Mark this test case as unimportant. Whatever
+  happens, afl-fuzz will ignore it.
 
-A special function is `__afl_coverage_interesting`.
-To use this, you must define `void __afl_coverage_interesting(u8 val, u32 id);`.
-Then you can use this function globally, where the `val` parameter can be set
-by you, the `id` parameter is for afl-fuzz and will be overwritten.
-Note that useful parameters for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
-A value of e.g. 33 will be seen as 32 for coverage purposes.
+A special function is `__afl_coverage_interesting`. To use this, you must define
+`void __afl_coverage_interesting(u8 val, u32 id);`. Then you can use this
+function globally, where the `val` parameter can be set by you, the `id`
+parameter is for afl-fuzz and will be overwritten. Note that useful parameters
+for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128. A value of, e.g., 33 will be seen
+as 32 for coverage purposes.
 
 ## 3) Selective instrumentation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
 
-This feature is equivalent to llvm 12 sancov feature and allows to specify
-on a filename and/or function name level to instrument these or skip them.
+This feature is equivalent to llvm 12 sancov feature and allows to specify on a
+filename and/or function name level to instrument these or skip them.
 
 ### 3a) How to use the partial instrumentation mode
 
 In order to build with partial instrumentation, you need to build with
-afl-clang-fast/afl-clang-fast++ or afl-clang-lto/afl-clang-lto++.
-The only required change is that you need to set either the environment variable
-AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST set with a filename.
+afl-clang-fast/afl-clang-fast++ or afl-clang-lto/afl-clang-lto++. The only
+required change is that you need to set either the environment variable
+`AFL_LLVM_ALLOWLIST` or `AFL_LLVM_DENYLIST` set with a filename.
 
 That file should contain the file names or functions that are to be instrumented
-(AFL_LLVM_ALLOWLIST) or are specifically NOT to be instrumented (AFL_LLVM_DENYLIST).
+(`AFL_LLVM_ALLOWLIST`) or are specifically NOT to be instrumented
+(`AFL_LLVM_DENYLIST`).
+
+GCC_PLUGIN: you can use either `AFL_LLVM_ALLOWLIST` or `AFL_GCC_ALLOWLIST` (or
+the same for `_DENYLIST`), both work.
 
-GCC_PLUGIN: you can use either AFL_LLVM_ALLOWLIST or AFL_GCC_ALLOWLIST (or the
-same for _DENYLIST), both work.
+For matching to succeed, the function/file name that is being compiled must end
+in the function/file name entry contained in this instrument file list. That is
+to avoid breaking the match when absolute paths are used during compilation.
 
-For matching to succeed, the function/file name that is being compiled must end in the
-function/file name entry contained in this instrument file list. That is to avoid
-breaking the match when absolute paths are used during compilation.
+**NOTE:** In builds with optimization enabled, functions might be inlined and
+would not match!
 
-**NOTE:** In builds with optimization enabled, functions might be inlined and would not match!
+For example, if your source tree looks like this:
 
-For example if your source tree looks like this:
 ```
 project/
 project/feature_a/a1.cpp
@@ -83,36 +87,45 @@ project/feature_b/b1.cpp
 project/feature_b/b2.cpp
 ```
 
-and you only want to test feature_a, then create an "instrument file list" file containing:
+And you only want to test feature_a, then create an "instrument file list" file
+containing:
+
 ```
 feature_a/a1.cpp
 feature_a/a2.cpp
 ```
 
-However if the "instrument file list" file contains only this, it works as well:
+However, if the "instrument file list" file contains only this, it works as
+well:
+
 ```
 a1.cpp
 a2.cpp
 ```
-but it might lead to files being unwantedly instrumented if the same filename
+
+But it might lead to files being unwantedly instrumented if the same filename
 exists somewhere else in the project directories.
 
-You can also specify function names. Note that for C++ the function names
-must be mangled to match! `nm` can print these names.
+You can also specify function names. Note that for C++ the function names must
+be mangled to match! `nm` can print these names.
+
+AFL++ is able to identify whether an entry is a filename or a function. However,
+if you want to be sure (and compliant to the sancov allow/blocklist format), you
+can specify source file entries like this:
 
-AFL++ is able to identify whether an entry is a filename or a function.
-However if you want to be sure (and compliant to the sancov allow/blocklist
-format), you can specify source file entries like this:
 ```
 src: *malloc.c
 ```
-and function entries like this:
+
+And function entries like this:
+
 ```
 fun: MallocFoo
 ```
+
 Note that whitespace is ignored and comments (`# foo`) are supported.
 
 ### 3b) UNIX-style pattern matching
 
 You can add UNIX-style pattern matching in the "instrument file list" entries.
-See `man fnmatch` for the syntax. We do not set any of the `fnmatch` flags.
+See `man fnmatch` for the syntax. Do not set any of the `fnmatch` flags.
\ No newline at end of file
diff --git a/instrumentation/README.laf-intel.md b/instrumentation/README.laf-intel.md
index 229807e8..06e653ea 100644
--- a/instrumentation/README.laf-intel.md
+++ b/instrumentation/README.laf-intel.md
@@ -2,20 +2,17 @@
 
 ## Introduction
 
-This originally is the work of an individual nicknamed laf-intel.
-His blog [Circumventing Fuzzing Roadblocks with Compiler Transformations]
-(https://lafintel.wordpress.com/) and gitlab repo [laf-llvm-pass]
-(https://gitlab.com/laf-intel/laf-llvm-pass/)
-describe some code transformations that
-help AFL++ to enter conditional blocks, where conditions consist of
-comparisons of large values.
+This originally is the work of an individual nicknamed laf-intel. His blog
+[Circumventing Fuzzing Roadblocks with Compiler Transformations](https://lafintel.wordpress.com/)
+and GitLab repo [laf-llvm-pass](https://gitlab.com/laf-intel/laf-llvm-pass/)
+describe some code transformations that help AFL++ to enter conditional blocks,
+where conditions consist of comparisons of large values.
 
 ## Usage
 
-By default these passes will not run when you compile programs using 
-afl-clang-fast. Hence, you can use AFL as usual.
-To enable the passes you must set environment variables before you
-compile the target project.
+By default, these passes will not run when you compile programs using
+afl-clang-fast. Hence, you can use AFL++ as usual. To enable the passes, you
+must set environment variables before you compile the target project.
 
 The following options exist:
 
@@ -25,32 +22,30 @@ Enables the split-switches pass.
 
 `export AFL_LLVM_LAF_TRANSFORM_COMPARES=1`
 
-Enables the transform-compares pass (strcmp, memcmp, strncmp,
-strcasecmp, strncasecmp).
+Enables the transform-compares pass (strcmp, memcmp, strncmp, strcasecmp,
+strncasecmp).
 
 `export AFL_LLVM_LAF_SPLIT_COMPARES=1`
 
-Enables the split-compares pass.
-By default it will 
+Enables the split-compares pass. By default, it will
 1. simplify operators >= (and <=) into chains of > (<) and == comparisons
-2. change signed integer comparisons to a chain of sign-only comparison
-and unsigned integer comparisons
-3. split all unsigned integer comparisons with bit widths of
-64, 32 or 16 bits to chains of 8 bits comparisons.
-
-You can change the behaviour of the last step by setting
-`export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>`, where 
-bit_width may be 64, 32 or 16. For example, a bit_width of 16
-would split larger comparisons down to 16 bit comparisons.
-
-A new experimental feature is splitting floating point comparisons into a
-series of sign, exponent and mantissa comparisons followed by splitting each
-of them into 8 bit comparisons when necessary.
-It is activated with the `AFL_LLVM_LAF_SPLIT_FLOATS` setting.
-Please note that full IEEE 754 functionality is not preserved, that is
-values of nan and infinity will probably behave differently.
-
-Note that setting this automatically activates `AFL_LLVM_LAF_SPLIT_COMPARES`
-
-You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled :-)
-
+2. change signed integer comparisons to a chain of sign-only comparison and
+   unsigned integer comparisons
+3. split all unsigned integer comparisons with bit widths of 64, 32, or 16 bits
+   to chains of 8 bits comparisons.
+
+You can change the behavior of the last step by setting `export
+AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>`, where bit_width may be 64, 32, or
+16. For example, a bit_width of 16 would split larger comparisons down to 16 bit
+comparisons.
+
+A new experimental feature is splitting floating point comparisons into a series
+of sign, exponent and mantissa comparisons followed by splitting each of them
+into 8 bit comparisons when necessary. It is activated with the
+`AFL_LLVM_LAF_SPLIT_FLOATS` setting. Note that full IEEE 754 functionality is
+not preserved, that is values of nan and infinity will probably behave
+differently.
+
+Note that setting this automatically activates `AFL_LLVM_LAF_SPLIT_COMPARES`.
+
+You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled. :-)
\ No newline at end of file
diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md
index 6e210a7c..d220e52c 100644
--- a/instrumentation/README.llvm.md
+++ b/instrumentation/README.llvm.md
@@ -1,72 +1,79 @@
 # Fast LLVM-based instrumentation for afl-fuzz
 
-  (See [../README.md](../README.md) for the general instruction manual.)
+For the general instruction manual, see [../README.md](../README.md).
 
-  (See [README.gcc_plugin.md](../README.gcc_plugin.md) for the GCC-based instrumentation.)
+For the GCC-based instrumentation, see
+[README.gcc_plugin.md](README.gcc_plugin.md).
 
 ## 1) Introduction
 
 ! llvm_mode works with llvm versions 3.8 up to 13 !
 
-The code in this directory allows you to instrument programs for AFL using
-true compiler-level instrumentation, instead of the more crude
-assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
-several interesting properties:
+The code in this directory allows you to instrument programs for AFL++ using
+true compiler-level instrumentation, instead of the more crude assembly-level
+rewriting approach taken by afl-gcc and afl-clang. This has several interesting
+properties:
 
-  - The compiler can make many optimizations that are hard to pull off when
-    manually inserting assembly. As a result, some slow, CPU-bound programs will
-    run up to around 2x faster.
+- The compiler can make many optimizations that are hard to pull off when
+  manually inserting assembly. As a result, some slow, CPU-bound programs will
+  run up to around 2x faster.
 
-    The gains are less pronounced for fast binaries, where the speed is limited
-    chiefly by the cost of creating new processes. In such cases, the gain will
-    probably stay within 10%.
+  The gains are less pronounced for fast binaries, where the speed is limited
+  chiefly by the cost of creating new processes. In such cases, the gain will
+  probably stay within 10%.
 
-  - The instrumentation is CPU-independent. At least in principle, you should
-    be able to rely on it to fuzz programs on non-x86 architectures (after
-    building afl-fuzz with AFL_NO_X86=1).
+- The instrumentation is CPU-independent. At least in principle, you should be
+  able to rely on it to fuzz programs on non-x86 architectures (after building
+  afl-fuzz with AFL_NO_X86=1).
 
-  - The instrumentation can cope a bit better with multi-threaded targets.
+- The instrumentation can cope a bit better with multi-threaded targets.
 
-  - Because the feature relies on the internals of LLVM, it is clang-specific
-    and will *not* work with GCC (see ../gcc_plugin/ for an alternative once
-    it is available).
+- Because the feature relies on the internals of LLVM, it is clang-specific and
+  will *not* work with GCC (see ../gcc_plugin/ for an alternative once it is
+  available).
 
 Once this implementation is shown to be sufficiently robust and portable, it
 will probably replace afl-clang. For now, it can be built separately and
 co-exists with the original code.
 
-The idea and much of the intial implementation came from Laszlo Szekeres.
+The idea and much of the initial implementation came from Laszlo Szekeres.
 
 ## 2a) How to use this - short
 
-Set the `LLVM_CONFIG` variable to the clang version you want to use, e.g.
+Set the `LLVM_CONFIG` variable to the clang version you want to use, e.g.:
+
 ```
 LLVM_CONFIG=llvm-config-9 make
 ```
+
 In case you have your own compiled llvm version specify the full path:
+
 ```
 LLVM_CONFIG=~/llvm-project/build/bin/llvm-config make
 ```
+
 If you try to use a new llvm version on an old Linux this can fail because of
 old c++ libraries. In this case usually switching to gcc/g++ to compile
 llvm_mode will work:
+
 ```
 LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make
 ```
-It is highly recommended to use the newest clang version you can put your
-hands on :)
+
+It is highly recommended to use the newest clang version you can put your hands
+on :)
 
 Then look at [README.persistent_mode.md](README.persistent_mode.md).
 
 ## 2b) How to use this - long
 
 In order to leverage this mechanism, you need to have clang installed on your
-system. You should also make sure that the llvm-config tool is in your path
-(or pointed to via LLVM_CONFIG in the environment).
+system. You should also make sure that the llvm-config tool is in your path (or
+pointed to via LLVM_CONFIG in the environment).
 
-Note that if you have several LLVM versions installed, pointing LLVM_CONFIG
-to the version you want to use will switch compiling to this specific
-version - if you installation is set up correctly :-)
+Note that if you have several LLVM versions installed, pointing LLVM_CONFIG to
+the version you want to use will switch compiling to this specific version - if
+you installation is set up correctly :-)
 
 Unfortunately, some systems that do have clang come without llvm-config or the
 LLVM development headers; one example of this is FreeBSD. FreeBSD users will
@@ -75,15 +82,15 @@ load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so).
 
 To solve all your problems, you can grab pre-built binaries for your OS from:
 
-  http://llvm.org/releases/download.html
+[https://llvm.org/releases/download.html](https://llvm.org/releases/download.html)
 
 ...and then put the bin/ directory from the tarball at the beginning of your
 $PATH when compiling the feature and building packages later on. You don't need
 to be root for that.
 
-To build the instrumentation itself, type 'make'. This will generate binaries
-called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this
-is done, you can instrument third-party code in a way similar to the standard
+To build the instrumentation itself, type `make`. This will generate binaries
+called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this is
+done, you can instrument third-party code in a way similar to the standard
 operating mode of AFL, e.g.:
 
 ```
@@ -93,81 +100,179 @@ operating mode of AFL, e.g.:
 
 Be sure to also include CXX set to afl-clang-fast++ for C++ code.
 
-Note that afl-clang-fast/afl-clang-fast++ are just pointers to afl-cc.
-You can also use afl-cc/afl-c++ and instead direct it to use LLVM
-instrumentation by either setting `AFL_CC_COMPILER=LLVM` or pass the parameter
-`--afl-llvm` via CFLAGS/CXXFLAGS/CPPFLAGS.
+Note that afl-clang-fast/afl-clang-fast++ are just pointers to afl-cc. You can
+also use afl-cc/afl-c++ and instead direct it to use LLVM instrumentation by
+either setting `AFL_CC_COMPILER=LLVM` or pass the parameter `--afl-llvm` via
+CFLAGS/CXXFLAGS/CPPFLAGS.
 
 The tool honors roughly the same environmental variables as afl-gcc (see
-[docs/env_variables.md](../docs/env_variables.md)). This includes AFL_USE_ASAN,
-AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored
-as it does not serve a good purpose with the more effective PCGUARD analysis.
+[docs/env_variables.md](../docs/env_variables.md)). This includes
+`AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`. However, `AFL_INST_RATIO`
+is not honored as it does not serve a good purpose with the more effective
+PCGUARD analysis.
 
 ## 3) Options
 
-Several options are present to make llvm_mode faster or help it rearrange
-the code to make afl-fuzz path discovery easier.
+Several options are present to make llvm_mode faster or help it rearrange the
+code to make afl-fuzz path discovery easier.
 
-If you need just to instrument specific parts of the code, you can the instrument file list
-which C/C++ files to actually instrument. See [README.instrument_list.md](README.instrument_list.md)
+If you need just to instrument specific parts of the code, you can the
+instrument file list which C/C++ files to actually instrument. See
+[README.instrument_list.md](README.instrument_list.md)
 
-For splitting memcmp, strncmp, etc. please see [README.laf-intel.md](README.laf-intel.md)
+For splitting memcmp, strncmp, etc., see
+[README.laf-intel.md](README.laf-intel.md).
 
 Then there are different ways of instrumenting the target:
 
-1. An better instrumentation strategy uses LTO and link time
-instrumentation. Note that not all targets can compile in this mode, however
-if it works it is the best option you can use.
-Simply use afl-clang-lto/afl-clang-lto++ to use this option.
-See [README.lto.md](README.lto.md)
+1. A better instrumentation strategy uses LTO and link time instrumentation.
+   Note that not all targets can compile in this mode, however, if it works it
+   is the best option you can use. To go with this option, use
+   afl-clang-lto/afl-clang-lto++. See [README.lto.md](README.lto.md).
 
-2. Alternativly you can choose a completely different coverage method:
+2. Alternatively you can choose a completely different coverage method:
 
-2a. N-GRAM coverage - which combines the previous visited edges with the
-current one. This explodes the map but on the other hand has proven to be
-effective for fuzzing.
-See [README.ngram.md](README.ngram.md)
+2a. N-GRAM coverage - which combines the previous visited edges with the current
+    one. This explodes the map but on the other hand has proven to be effective
+    for fuzzing. See
+    [7) AFL++ N-Gram Branch Coverage](#7-afl-n-gram-branch-coverage).
 
 2b. Context sensitive coverage - which combines the visited edges with an
-individual caller ID (the function that called the current one)
-[README.ctx.md](README.ctx.md)
+    individual caller ID (the function that called the current one). See
+    [6) AFL++ Context Sensitive Branch Coverage](#6-afl-context-sensitive-branch-coverage).
 
-Then - additionally to one of the instrumentation options above - there is
-a very effective new instrumentation option called CmpLog as an alternative to
-laf-intel that allow AFL++ to apply mutations similar to Redqueen.
-See [README.cmplog.md](README.cmplog.md)
+Then - additionally to one of the instrumentation options above - there is a
+very effective new instrumentation option called CmpLog as an alternative to
+laf-intel that allow AFL++ to apply mutations similar to Redqueen. See
+[README.cmplog.md](README.cmplog.md).
 
-Finally if your llvm version is 8 or lower, you can activate a mode that
-prevents that a counter overflow result in a 0 value. This is good for
-path discovery, but the llvm implementation for x86 for this functionality
-is not optimal and was only fixed in llvm 9.
-You can set this with AFL_LLVM_NOT_ZERO=1
-See [README.neverzero.md](README.neverzero.md)
+Finally, if your llvm version is 8 or lower, you can activate a mode that
+prevents that a counter overflow result in a 0 value. This is good for path
+discovery, but the llvm implementation for x86 for this functionality is not
+optimal and was only fixed in llvm 9. You can set this with AFL_LLVM_NOT_ZERO=1.
 
-Support for thread safe counters has been added for all modes.
-Activate it with `AFL_LLVM_THREADSAFE_INST=1`. The tradeoff is better precision
-in multi threaded apps for a slightly higher instrumentation overhead.
-This also disables the nozero counter default for performance reasons.
+Support for thread safe counters has been added for all modes. Activate it with
+`AFL_LLVM_THREADSAFE_INST=1`. The tradeoff is better precision in multi threaded
+apps for a slightly higher instrumentation overhead. This also disables the
+nozero counter default for performance reasons.
 
-## 4) Snapshot feature
+## 4) deferred initialization, persistent mode, shared memory fuzzing
 
-To speed up fuzzing you can use a linux loadable kernel module which enables
-a snapshot feature.
-See [README.snapshot.md](README.snapshot.md)
+This is the most powerful and effective fuzzing you can do. For a full
+explanation, see [README.persistent_mode.md](README.persistent_mode.md).
 
-## 5) Gotchas, feedback, bugs
+## 5) Bonus feature: 'dict2file' pass
 
-This is an early-stage mechanism, so field reports are welcome. You can send bug
-reports to <afl-users@googlegroups.com>.
+Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
+all constant string compare parameters will be written to this file to be used
+with afl-fuzz' `-x` option.
 
-## 6) deferred initialization, persistent mode, shared memory fuzzing
+## 6) AFL++ Context Sensitive Branch Coverage
 
-This is the most powerful and effective fuzzing you can do.
-Please see [README.persistent_mode.md](README.persistent_mode.md) for a
-full explanation.
+### What is this?
 
-## 7) Bonus feature: 'dict2file' pass
+This is an LLVM-based implementation of the context sensitive branch coverage.
 
-Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
-all constant string compare parameters will be written to this file to be
-used with afl-fuzz' `-x` option.
+Basically every function gets its own ID and, every time when an edge is logged,
+all the IDs in the callstack are hashed and combined with the edge transition
+hash to augment the classic edge coverage with the information about the calling
+context.
+
+So if both function A and function B call a function C, the coverage collected
+in C will be different.
+
+In math the coverage is collected as follows: `map[current_location_ID ^
+previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
+
+The callstack hash is produced XOR-ing the function IDs to avoid explosion with
+recursive functions.
+
+### Usage
+
+Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
+
+It is highly recommended to increase the MAP_SIZE_POW2 definition in config.h to
+at least 18 and maybe up to 20 for this as otherwise too many map collisions
+occur.
+
+### Caller Branch Coverage
+
+If the context sensitive coverage introduces too may collisions and becoming
+detrimental, the user can choose to augment edge coverage with just the called
+function ID, instead of the entire callstack hash.
+
+In math the coverage is collected as follows: `map[current_location_ID ^
+previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+
+Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment
+variable.
+
+## 7) AFL++ N-Gram Branch Coverage
+
+### Source
+
+This is an LLVM-based implementation of the n-gram branch coverage proposed in
+the paper
+["Be Sensitive and Collaborative: Analyzing Impact of Coverage Metrics in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf)
+by Jinghan Wang, et. al.
+
+Note that the original implementation (available
+[here](https://github.com/bitsecurerlab/afl-sensitive)) is built on top of AFL's
+QEMU mode. This is essentially a port that uses LLVM vectorized instructions
+(available from llvm versions 4.0.1 and higher) to achieve the same results when
+compiling source code.
+
+In math the branch coverage is performed as follows: `map[current_location ^
+prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
+
+### Usage
+
+The size of `n` (i.e., the number of branches to remember) is an option that is
+specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the
+`AFL_LLVM_NGRAM_SIZE` environment variable. Good values are 2, 4, or 8, valid
+are 2-16.
+
+It is highly recommended to increase the MAP_SIZE_POW2 definition in config.h to
+at least 18 and maybe up to 20 for this as otherwise too many map collisions
+occur.
+
+## 8) NeverZero counters
+
+In larger, complex, or reiterative programs, the byte sized counters that
+collect the edge coverage can easily fill up and wrap around. This is not that
+much of an issue - unless, by chance, it wraps just to a value of zero when the
+program execution ends. In this case, afl-fuzz is not able to see that the edge
+has been accessed and will ignore it.
+
+NeverZero prevents this behavior. If a counter wraps, it jumps over the value 0
+directly to a 1. This improves path discovery (by a very small amount) at a very
+low cost (one instruction per edge).
+
+(The alternative of saturated counters has been tested also and proved to be
+inferior in terms of path discovery.)
+
+This is implemented in afl-gcc and afl-gcc-fast, however, for llvm_mode this is
+optional if multithread safe counters are selected or the llvm version is below
+9 - as there are severe performance costs in these cases.
+
+If you want to enable this for llvm versions below 9 or thread safe counters,
+then set
+
+```
+export AFL_LLVM_NOT_ZERO=1
+```
+
+In case you are on llvm 9 or greater and you do not want this behavior, then you
+can set:
+
+```
+AFL_LLVM_SKIP_NEVERZERO=1
+```
+
+If the target does not have extensive loops or functions that are called a lot,
+then this can give a small performance boost.
+
+Please note that the default counter implementations are not thread safe!
+
+Support for thread safe counters in mode LLVM CLASSIC can be activated with
+setting `AFL_LLVM_THREADSAFE_INST=1`.
\ No newline at end of file
diff --git a/instrumentation/README.lto.md b/instrumentation/README.lto.md
index 6174cdc0..a20175b1 100644
--- a/instrumentation/README.lto.md
+++ b/instrumentation/README.lto.md
@@ -1,55 +1,56 @@
 # afl-clang-lto - collision free instrumentation at link time
 
-## TLDR;
+## TL;DR:
 
-This version requires a current llvm 11+ compiled from the github master.
+This version requires a current llvm 11+ compiled from the GitHub master.
 
 1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
-   coverage than anything else that is out there in the AFL world
+   coverage than anything else that is out there in the AFL world.
 
-2. You can use it together with llvm_mode: laf-intel and the instrument file listing
-   features and can be combined with cmplog/Redqueen
+2. You can use it together with llvm_mode: laf-intel and the instrument file
+   listing features and can be combined with cmplog/Redqueen.
 
-3. It only works with llvm 11+
+3. It only works with llvm 11+.
 
-4. AUTODICTIONARY feature! see below
+4. AUTODICTIONARY feature (see below)!
 
-5. If any problems arise be sure to set `AR=llvm-ar RANLIB=llvm-ranlib`.
-   Some targets might need `LD=afl-clang-lto` and others `LD=afl-ld-lto`.
+5. If any problems arise, be sure to set `AR=llvm-ar RANLIB=llvm-ranlib`. Some
+   targets might need `LD=afl-clang-lto` and others `LD=afl-ld-lto`.
 
 ## Introduction and problem description
 
-A big issue with how AFL/AFL++ works is that the basic block IDs that are
-set during compilation are random - and hence naturally the larger the number
-of instrumented locations, the higher the number of edge collisions are in the
-map. This can result in not discovering new paths and therefore degrade the
+A big issue with how AFL++ works is that the basic block IDs that are set during
+compilation are random - and hence naturally the larger the number of
+instrumented locations, the higher the number of edge collisions are in the map.
+This can result in not discovering new paths and therefore degrade the
 efficiency of the fuzzing process.
 
-*This issue is underestimated in the fuzzing community!*
-With a 2^16 = 64kb standard map at already 256 instrumented blocks there is
-on average one collision. On average a target has 10.000 to 50.000
-instrumented blocks hence the real collisions are between 750-18.000!
+*This issue is underestimated in the fuzzing community!* With a 2^16 = 64kb
+standard map at already 256 instrumented blocks, there is on average one
+collision. On average, a target has 10.000 to 50.000 instrumented blocks, hence
+the real collisions are between 750-18.000!
 
-To reach a solution that prevents any collisions took several approaches
-and many dead ends until we got to this:
+To reach a solution that prevents any collisions took several approaches and
+many dead ends until we got to this:
 
- * We instrument at link time when we have all files pre-compiled
- * To instrument at link time we compile in LTO (link time optimization) mode
- * Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the
-   correct LTO options and runs our own afl-ld linker instead of the system
-   linker
- * The LLVM linker collects all LTO files to link and instruments them so that
-   we have non-colliding edge overage
- * We use a new (for afl) edge coverage - which is the same as in llvm
-   -fsanitize=coverage edge coverage mode :)
+* We instrument at link time when we have all files pre-compiled.
+* To instrument at link time, we compile in LTO (link time optimization) mode.
+* Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the correct
+  LTO options and runs our own afl-ld linker instead of the system linker.
+* The LLVM linker collects all LTO files to link and instruments them so that we
+  have non-colliding edge overage.
+* We use a new (for afl) edge coverage - which is the same as in llvm
+  -fsanitize=coverage edge coverage mode. :)
 
 The result:
- * 10-25% speed gain compared to llvm_mode
- * guaranteed non-colliding edge coverage :-)
- * The compile time especially for binaries to an instrumented library can be
-   much longer
+
+* 10-25% speed gain compared to llvm_mode
+* guaranteed non-colliding edge coverage :-)
+* The compile time, especially for binaries to an instrumented library, can be
+  much longer.
 
 Example build output from a libtiff build:
+
 ```
 libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o  ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm
 afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO
@@ -62,21 +63,24 @@ AUTODICTIONARY: 11 strings found
 
 ### Installing llvm version 11 or 12
 
-llvm 11 or even 12 should be available in all current Linux repositories.
-If you use an outdated Linux distribution read the next section.
+llvm 11 or even 12 should be available in all current Linux repositories. If you
+use an outdated Linux distribution, read the next section.
 
 ### Installing llvm from the llvm repository (version 12+)
 
 Installing the llvm snapshot builds is easy and mostly painless:
 
-In the follow line change `NAME` for your Debian or Ubuntu release name
-(e.g. buster, focal, eon, etc.):
+In the following line, change `NAME` for your Debian or Ubuntu release name
+(e.g., buster, focal, eon, etc.):
+
 ```
 echo deb http://apt.llvm.org/NAME/ llvm-toolchain-NAME NAME >> /etc/apt/sources.list
 ```
-then add the pgp key of llvm and install the packages:
+
+Then add the pgp key of llvm and install the packages:
+
 ```
-wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 
+wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
 apt-get update && apt-get upgrade -y
 apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
     libc++abi1-12 libc++abi-12-dev libclang1-12 libclang-12-dev \
@@ -87,7 +91,8 @@ apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
 
 ### Building llvm yourself (version 12+)
 
-Building llvm from github takes quite some long time and is not painless:
+Building llvm from GitHub takes quite some time and is not painless:
+
 ```sh
 sudo apt install binutils-dev  # this is *essential*!
 git clone --depth=1 https://github.com/llvm/llvm-project
@@ -126,10 +131,12 @@ sudo make install
 
 Just use afl-clang-lto like you did with afl-clang-fast or afl-gcc.
 
-Also the instrument file listing (AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST -> [README.instrument_list.md](README.instrument_list.md)) and
-laf-intel/compcov (AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work.
+Also, the instrument file listing (AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST ->
+[README.instrument_list.md](README.instrument_list.md)) and laf-intel/compcov
+(AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work.
 
 Example:
+
 ```
 CC=afl-clang-lto CXX=afl-clang-lto++ RANLIB=llvm-ranlib AR=llvm-ar ./configure
 make
@@ -143,51 +150,48 @@ NOTE: some targets also need to set the linker, try both `afl-clang-lto` and
 Note: this is highly discouraged! Try to compile to static libraries with
 afl-clang-lto instead of shared libraries!
 
-To make instrumented shared libraries work with afl-clang-lto you have to do
+To make instrumented shared libraries work with afl-clang-lto, you have to do
 quite some extra steps.
 
-Every shared library you want to instrument has to be individually compiled.
-The environment variable `AFL_LLVM_LTO_DONTWRITEID=1` has to be set during
-compilation.
-Additionally the environment variable `AFL_LLVM_LTO_STARTID` has to be set to
-the added edge count values of all previous compiled instrumented shared
-libraries for that target.
-E.g. for the first shared library this would be `AFL_LLVM_LTO_STARTID=0` and
-afl-clang-lto will then report how many edges have been instrumented (let's say
-it reported 1000 instrumented edges).
-The second shared library then has to be set to that value
+Every shared library you want to instrument has to be individually compiled. The
+environment variable `AFL_LLVM_LTO_DONTWRITEID=1` has to be set during
+compilation. Additionally, the environment variable `AFL_LLVM_LTO_STARTID` has
+to be set to the added edge count values of all previous compiled instrumented
+shared libraries for that target. E.g., for the first shared library this would
+be `AFL_LLVM_LTO_STARTID=0` and afl-clang-lto will then report how many edges
+have been instrumented (let's say it reported 1000 instrumented edges). The
+second shared library then has to be set to that value
 (`AFL_LLVM_LTO_STARTID=1000` in our example), for the third to all previous
 counts added, etc.
 
-The final program compilation step then may *not* have `AFL_LLVM_LTO_DONTWRITEID`
-set, and `AFL_LLVM_LTO_STARTID` must be set to all edge counts added of all shared
-libraries it will be linked to.
+The final program compilation step then may *not* have
+`AFL_LLVM_LTO_DONTWRITEID` set, and `AFL_LLVM_LTO_STARTID` must be set to all
+edge counts added of all shared libraries it will be linked to.
 
-This is quite some hands-on work, so better stay away from instrumenting
-shared libraries :-)
+This is quite some hands-on work, so better stay away from instrumenting shared
+libraries. :-)
 
 ## AUTODICTIONARY feature
 
 While compiling, a dictionary based on string comparisons is automatically
-generated and put into the target binary. This dictionary is transfered to afl-fuzz
-on start. This improves coverage statistically by 5-10% :)
+generated and put into the target binary. This dictionary is transferred to
+afl-fuzz on start. This improves coverage statistically by 5-10%. :)
 
-Note that if for any reason you do not want to use the autodictionary feature
+Note that if for any reason you do not want to use the autodictionary feature,
 then just set the environment variable `AFL_NO_AUTODICT` when starting afl-fuzz.
 
 ## Fixed memory map
 
 To speed up fuzzing a little bit more, it is possible to set a fixed shared
-memory map.
-Recommended is the value 0x10000.
+memory map. Recommended is the value 0x10000.
 
-In most cases this will work without any problems. However if a target uses
-early constructors, ifuncs or a deferred forkserver this can crash the target.
+In most cases, this will work without any problems. However, if a target uses
+early constructors, ifuncs, or a deferred forkserver, this can crash the target.
 
-Also on unusual operating systems/processors/kernels or weird libraries the
+Also, on unusual operating systems/processors/kernels or weird libraries the
 recommended 0x10000 address might not work, so then change the fixed address.
 
-To enable this feature set AFL_LLVM_MAP_ADDR with the address.
+To enable this feature, set `AFL_LLVM_MAP_ADDR` with the address.
 
 ## Document edge IDs
 
@@ -198,7 +202,7 @@ bytes or which functions were touched by an input.
 ## Solving difficult targets
 
 Some targets are difficult because the configure script does unusual stuff that
-is unexpected for afl. See the next chapter `Potential issues` for how to solve
+is unexpected for afl. See the next section `Potential issues` for how to solve
 these.
 
 ### Example: ffmpeg
@@ -206,143 +210,155 @@ these.
 An example of a hard to solve target is ffmpeg. Here is how to successfully
 instrument it:
 
-1. Get and extract the current ffmpeg and change to its directory
+1. Get and extract the current ffmpeg and change to its directory.
 
 2. Running configure with --cc=clang fails and various other items will fail
    when compiling, so we have to trick configure:
 
-```
-./configure --enable-lto --disable-shared --disable-inline-asm
-```
-
-3. Now the configuration is done - and we edit the settings in `./ffbuild/config.mak`
-   (-: the original line, +: what to change it into):
-```
--CC=gcc
-+CC=afl-clang-lto
--CXX=g++
-+CXX=afl-clang-lto++
--AS=gcc
-+AS=llvm-as
--LD=gcc
-+LD=afl-clang-lto++
--DEPCC=gcc
-+DEPCC=afl-clang-lto
--DEPAS=gcc
-+DEPAS=afl-clang-lto++
--AR=ar
-+AR=llvm-ar
--AR_CMD=ar
-+AR_CMD=llvm-ar
--NM_CMD=nm -g
-+NM_CMD=llvm-nm -g
--RANLIB=ranlib -D
-+RANLIB=llvm-ranlib -D
-```
-
-4. Then type make, wait for a long time and you are done :)
+    ```
+    ./configure --enable-lto --disable-shared --disable-inline-asm
+    ```
+
+3. Now the configuration is done - and we edit the settings in
+   `./ffbuild/config.mak` (-: the original line, +: what to change it into):
+
+    ```
+    -CC=gcc
+    +CC=afl-clang-lto
+    -CXX=g++
+    +CXX=afl-clang-lto++
+    -AS=gcc
+    +AS=llvm-as
+    -LD=gcc
+    +LD=afl-clang-lto++
+    -DEPCC=gcc
+    +DEPCC=afl-clang-lto
+    -DEPAS=gcc
+    +DEPAS=afl-clang-lto++
+    -AR=ar
+    +AR=llvm-ar
+    -AR_CMD=ar
+    +AR_CMD=llvm-ar
+    -NM_CMD=nm -g
+    +NM_CMD=llvm-nm -g
+    -RANLIB=ranlib -D
+    +RANLIB=llvm-ranlib -D
+    ```
+
+4. Then type make, wait for a long time, and you are done. :)
 
 ### Example: WebKit jsc
 
 Building jsc is difficult as the build script has bugs.
 
-1. checkout Webkit: 
-```
-svn checkout https://svn.webkit.org/repository/webkit/trunk WebKit
-cd WebKit
-```
+1. Checkout Webkit:
+
+    ```
+    svn checkout https://svn.webkit.org/repository/webkit/trunk WebKit
+    cd WebKit
+    ```
 
 2. Fix the build environment:
-```
-mkdir -p WebKitBuild/Release
-cd WebKitBuild/Release
-ln -s ../../../../../usr/bin/llvm-ar-12 llvm-ar-12
-ln -s ../../../../../usr/bin/llvm-ranlib-12 llvm-ranlib-12
-cd ../..
-```
 
-3. Build :)
+    ```
+    mkdir -p WebKitBuild/Release
+    cd WebKitBuild/Release
+    ln -s ../../../../../usr/bin/llvm-ar-12 llvm-ar-12
+    ln -s ../../../../../usr/bin/llvm-ranlib-12 llvm-ranlib-12
+    cd ../..
+    ```
 
-```
-Tools/Scripts/build-jsc --jsc-only --cli --cmakeargs="-DCMAKE_AR='llvm-ar-12' -DCMAKE_RANLIB='llvm-ranlib-12' -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CC_FLAGS='-O3 -lrt' -DCMAKE_CXX_FLAGS='-O3 -lrt' -DIMPORTED_LOCATION='/lib/x86_64-linux-gnu/' -DCMAKE_CC=afl-clang-lto -DCMAKE_CXX=afl-clang-lto++ -DENABLE_STATIC_JSC=ON"
-```
+3. Build. :)
+
+    ```
+    Tools/Scripts/build-jsc --jsc-only --cli --cmakeargs="-DCMAKE_AR='llvm-ar-12' -DCMAKE_RANLIB='llvm-ranlib-12' -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CC_FLAGS='-O3 -lrt' -DCMAKE_CXX_FLAGS='-O3 -lrt' -DIMPORTED_LOCATION='/lib/x86_64-linux-gnu/' -DCMAKE_CC=afl-clang-lto -DCMAKE_CXX=afl-clang-lto++ -DENABLE_STATIC_JSC=ON"
+    ```
 
 ## Potential issues
 
-### compiling libraries fails
+### Compiling libraries fails
 
 If you see this message:
+
 ```
 /bin/ld: libfoo.a: error adding symbols: archive has no index; run ranlib to add one
 ```
-This is because usually gnu gcc ranlib is being called which cannot deal with clang LTO files.
-The solution is simple: when you ./configure you also have to set RANLIB=llvm-ranlib and AR=llvm-ar
+
+This is because usually gnu gcc ranlib is being called which cannot deal with
+clang LTO files. The solution is simple: when you `./configure`, you also have
+to set `RANLIB=llvm-ranlib` and `AR=llvm-ar`.
 
 Solution:
+
 ```
 AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared
 ```
-and on some targets you have to set AR=/RANLIB= even for make as the configure script does not save it.
-Other targets ignore environment variables and need the parameters set via
-`./configure --cc=... --cxx= --ranlib= ...` etc. (I am looking at you ffmpeg!).
 
+And on some targets you have to set `AR=/RANLIB=` even for `make` as the
+configure script does not save it. Other targets ignore environment variables
+and need the parameters set via `./configure --cc=... --cxx= --ranlib= ...` etc.
+(I am looking at you ffmpeg!)
+
+If you see this message:
 
-If you see this message
 ```
 assembler command failed ...
 ```
-then try setting `llvm-as` for configure:
+
+Then try setting `llvm-as` for configure:
+
 ```
 AS=llvm-as  ...
 ```
 
-### compiling programs still fail
+### Compiling programs still fail
 
 afl-clang-lto is still work in progress.
 
 Known issues:
-  * Anything that llvm 11+ cannot compile, afl-clang-lto cannot compile either - obviously
-  * Anything that does not compile with LTO, afl-clang-lto cannot compile either - obviously
+* Anything that llvm 11+ cannot compile, afl-clang-lto cannot compile either -
+  obviously.
+* Anything that does not compile with LTO, afl-clang-lto cannot compile either -
+  obviously.
 
-Hence if building a target with afl-clang-lto fails try to build it with llvm12
-and LTO enabled (`CC=clang-12` `CXX=clang++-12` `CFLAGS=-flto=full` and
-`CXXFLAGS=-flto=full`).
+Hence, if building a target with afl-clang-lto fails, try to build it with
+llvm12 and LTO enabled (`CC=clang-12`, `CXX=clang++-12`, `CFLAGS=-flto=full`,
+and `CXXFLAGS=-flto=full`).
 
-If this succeeeds then there is an issue with afl-clang-lto. Please report at
-[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226)
+If this succeeds, then there is an issue with afl-clang-lto. Please report at
+[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226).
 
-Even some targets where clang-12 fails can be build if the fail is just in
+Even some targets where clang-12 fails can be built if the fail is just in
 `./configure`, see `Solving difficult targets` above.
 
 ## History
 
-This was originally envisioned by hexcoder- in Summer 2019, however we saw no
-way to create a pass that is run at link time - although there is a option
-for this in the PassManager: EP_FullLinkTimeOptimizationLast
-("Fun" info - nobody knows what this is doing. And the developer who
-implemented this didn't respond to emails.)
-
-In December then came the idea to implement this as a pass that is run via
-the llvm "opt" program, which is performed via an own linker that afterwards
-calls the real linker.
-This was first implemented in January and work ... kinda.
-The LTO time instrumentation worked, however "how" the basic blocks were
-instrumented was a problem, as reducing duplicates turned out to be very,
-very difficult with a program that has so many paths and therefore so many
-dependencies. A lot of strategies were implemented - and failed.
-And then sat solvers were tried, but with over 10.000 variables that turned
-out to be a dead-end too.
+This was originally envisioned by hexcoder- in Summer 2019. However, we saw no
+way to create a pass that is run at link time - although there is a option for
+this in the PassManager: EP_FullLinkTimeOptimizationLast. ("Fun" info - nobody
+knows what this is doing. And the developer who implemented this didn't respond
+to emails.)
+
+In December then came the idea to implement this as a pass that is run via the
+llvm "opt" program, which is performed via an own linker that afterwards calls
+the real linker. This was first implemented in January and work ... kinda. The
+LTO time instrumentation worked, however, "how" the basic blocks were
+instrumented was a problem, as reducing duplicates turned out to be very, very
+difficult with a program that has so many paths and therefore so many
+dependencies. A lot of strategies were implemented - and failed. And then sat
+solvers were tried, but with over 10.000 variables that turned out to be a
+dead-end too.
 
 The final idea to solve this came from domenukk who proposed to insert a block
-into an edge and then just use incremental counters ... and this worked!
-After some trials and errors to implement this vanhauser-thc found out that
-there is actually an llvm function for this: SplitEdge() :-)
+into an edge and then just use incremental counters ... and this worked! After
+some trials and errors to implement this vanhauser-thc found out that there is
+actually an llvm function for this: SplitEdge() :-)
 
-Still more problems came up though as this only works without bugs from
-llvm 9 onwards, and with high optimization the link optimization ruins
-the instrumented control flow graph.
+Still more problems came up though as this only works without bugs from llvm 9
+onwards, and with high optimization the link optimization ruins the instrumented
+control flow graph.
 
-This is all now fixed with llvm 11+. The llvm's own linker is now able to
-load passes and this bypasses all problems we had.
+This is all now fixed with llvm 11+. The llvm's own linker is now able to load
+passes and this bypasses all problems we had.
 
-Happy end :)
+Happy end :)
\ No newline at end of file
diff --git a/instrumentation/README.neverzero.md b/instrumentation/README.neverzero.md
deleted file mode 100644
index 9bcae324..00000000
--- a/instrumentation/README.neverzero.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# NeverZero counters for LLVM instrumentation
-
-## Usage
-
-In larger, complex or reiterative programs the byte sized counters that collect
-the edge coverage can easily fill up and wrap around.
-This is not that much of an issue - unless by chance it wraps just to a value
-of zero when the program execution ends.
-In this case afl-fuzz is not able to see that the edge has been accessed and
-will ignore it.
-
-NeverZero prevents this behaviour. If a counter wraps, it jumps over the value
-0 directly to a 1. This improves path discovery (by a very little amount)
-at a very little cost (one instruction per edge).
-
-(The alternative of saturated counters has been tested also and proved to be
-inferior in terms of path discovery.)
-
-This is implemented in afl-gcc and afl-gcc-fast, however for llvm_mode this is
-optional if multithread safe counters are selected or the llvm version is below
-9 - as there are severe performance costs in these cases.
-
-If you want to enable this for llvm versions below 9 or thread safe counters
-then set
-
-```
-export AFL_LLVM_NOT_ZERO=1
-```
-
-In case you are on llvm 9 or greater and you do not want this behaviour then
-you can set:
-```
-AFL_LLVM_SKIP_NEVERZERO=1
-```
-If the target does not have extensive loops or functions that are called
-a lot then this can give a small performance boost.
-
-Please note that the default counter implementations are not thread safe!
-
-Support for thread safe counters in mode LLVM CLASSIC can be activated with setting
-`AFL_LLVM_THREADSAFE_INST=1`.
\ No newline at end of file
diff --git a/instrumentation/README.ngram.md b/instrumentation/README.ngram.md
deleted file mode 100644
index da61ef32..00000000
--- a/instrumentation/README.ngram.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# AFL N-Gram Branch Coverage
-
-## Source
-
-This is an LLVM-based implementation of the n-gram branch coverage proposed in
-the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics
-in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf),
-by Jinghan Wang, et. al.
-
-Note that the original implementation (available
-[here](https://github.com/bitsecurerlab/afl-sensitive))
-is built on top of AFL's QEMU mode.
-This is essentially a port that uses LLVM vectorized instructions (available from
-llvm versions 4.0.1 and higher) to achieve the same results when compiling source code.
-
-In math the branch coverage is performed as follows:
-`map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
-
-## Usage
-
-The size of `n` (i.e., the number of branches to remember) is an option
-that is specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the
-`AFL_LLVM_NGRAM_SIZE` environment variable.
-Good values are 2, 4 or 8, valid are 2-16.
-
-It is highly recommended to increase the MAP_SIZE_POW2 definition in
-config.h to at least 18 and maybe up to 20 for this as otherwise too
-many map collisions occur.
diff --git a/instrumentation/README.out_of_line.md b/instrumentation/README.out_of_line.md
deleted file mode 100644
index 346fe98d..00000000
--- a/instrumentation/README.out_of_line.md
+++ /dev/null
@@ -1,19 +0,0 @@
-## Using AFL++ without inlined instrumentation
-
-  This file describes how you can disable inlining of instrumentation.
-
-
-By default, the GCC plugin will duplicate the effects of calling
-`__afl_trace` (see `afl-gcc-rt.o.c`) in instrumented code, instead of
-issuing function calls.
-
-The calls are presumed to be slower, more so because the rt file
-itself is not optimized by the compiler.
-
-Setting `AFL_GCC_OUT_OF_LINE=1` in the environment while compiling code
-with the plugin will disable this inlining, issuing calls to the
-unoptimized runtime instead.
-
-You probably don't want to do this, but it might be useful in certain
-AFL debugging scenarios, and it might work as a fallback in case
-something goes wrong with the inlined instrumentation.
diff --git a/instrumentation/README.persistent_mode.md b/instrumentation/README.persistent_mode.md
index c6ba2103..14e59f4a 100644
--- a/instrumentation/README.persistent_mode.md
+++ b/instrumentation/README.persistent_mode.md
@@ -3,23 +3,23 @@
 ## 1) Introduction
 
 In persistent mode, AFL++ fuzzes a target multiple times in a single forked
-process, instead of forking a new process for each fuzz execution.
-This is the most effective way to fuzz, as the speed can easily be x10 or x20
-times faster without any disadvanges.
-*All professional fuzzing uses this mode.*
+process, instead of forking a new process for each fuzz execution. This is the
+most effective way to fuzz, as the speed can easily be x10 or x20 times faster
+without any disadvantages. *All professional fuzzing uses this mode.*
 
 Persistent mode requires that the target can be called in one or more functions,
 and that it's state can be completely reset so that multiple calls can be
 performed without resource leaks, and that earlier runs will have no impact on
-future runs (an indicator for this is the `stability` value in the `afl-fuzz`
-UI, if this decreases to lower values in persistent mode compared to
-non-persistent mode, that the fuzz target keeps state).
+future runs. An indicator for this is the `stability` value in the `afl-fuzz`
+UI. If this decreases to lower values in persistent mode compared to
+non-persistent mode, then the fuzz target keeps state.
 
 Examples can be found in [utils/persistent_mode](../utils/persistent_mode).
 
-## 2) TLDR;
+## 2) TL;DR:
 
 Example `fuzz_target.c`:
+
 ```c
 #include "what_you_need_for_your_target.h"
 
@@ -27,7 +27,7 @@ __AFL_FUZZ_INIT();
 
 main() {
 
-  // anything else here, eg. command line arguments, initialization, etc.
+  // anything else here, e.g. command line arguments, initialization, etc.
 
 #ifdef __AFL_HAVE_MANUAL_CONTROL
   __AFL_INIT();
@@ -54,14 +54,16 @@ main() {
 
 }
 ```
+
 And then compile:
+
 ```
 afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target
 ```
-And that is it!
-The speed increase is usually x10 to x20.
 
-If you want to be able to compile the target without afl-clang-fast/lto then
+And that is it! The speed increase is usually x10 to x20.
+
+If you want to be able to compile the target without afl-clang-fast/lto, then
 add this just after the includes:
 
 ```c
@@ -72,20 +74,20 @@ add this just after the includes:
   #define __AFL_FUZZ_TESTCASE_BUF fuzz_buf
   #define __AFL_FUZZ_INIT() void sync(void);
   #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0)
-  #define __AFL_INIT() sync() 
+  #define __AFL_INIT() sync()
 #endif
 ```
 
 ## 3) Deferred initialization
 
-AFL tries to optimize performance by executing the targeted binary just once,
-stopping it just before `main()`, and then cloning this "main" process to get
-a steady supply of targets to fuzz.
+AFL++ tries to optimize performance by executing the targeted binary just once,
+stopping it just before `main()`, and then cloning this "main" process to get a
+steady supply of targets to fuzz.
 
-Although this approach eliminates much of the OS-, linker- and libc-level
-costs of executing the program, it does not always help with binaries that
-perform other time-consuming initialization steps - say, parsing a large config
-file before getting to the fuzzed data.
+Although this approach eliminates much of the OS-, linker- and libc-level costs
+of executing the program, it does not always help with binaries that perform
+other time-consuming initialization steps - say, parsing a large config file
+before getting to the fuzzed data.
 
 In such cases, it's beneficial to initialize the forkserver a bit later, once
 most of the initialization work is already done, but before the binary attempts
@@ -93,22 +95,21 @@ to read the fuzzed input and parse it; in some cases, this can offer a 10x+
 performance gain. You can implement delayed initialization in LLVM mode in a
 fairly simple way.
 
-First, find a suitable location in the code where the delayed cloning can 
-take place. This needs to be done with *extreme* care to avoid breaking the
-binary. In particular, the program will probably malfunction if you select
-a location after:
+First, find a suitable location in the code where the delayed cloning can take
+place. This needs to be done with *extreme* care to avoid breaking the binary.
+In particular, the program will probably malfunction if you select a location
+after:
 
-  - The creation of any vital threads or child processes - since the forkserver
-    can't clone them easily.
+- The creation of any vital threads or child processes - since the forkserver
+  can't clone them easily.
 
-  - The initialization of timers via `setitimer()` or equivalent calls.
+- The initialization of timers via `setitimer()` or equivalent calls.
 
-  - The creation of temporary files, network sockets, offset-sensitive file
-    descriptors, and similar shared-state resources - but only provided that
-    their state meaningfully influences the behavior of the program later on.
+- The creation of temporary files, network sockets, offset-sensitive file
+  descriptors, and similar shared-state resources - but only provided that their
+  state meaningfully influences the behavior of the program later on.
 
-  - Any access to the fuzzed input, including reading the metadata about its
-    size.
+- Any access to the fuzzed input, including reading the metadata about its size.
 
 With the location selected, add this code in the appropriate spot:
 
@@ -126,13 +127,12 @@ Finally, recompile the program with afl-clang-fast/afl-clang-lto/afl-gcc-fast
 (afl-gcc or afl-clang will *not* generate a deferred-initialization binary) -
 and you should be all set!
 
-
 ## 4) Persistent mode
 
 Some libraries provide APIs that are stateless, or whose state can be reset in
 between processing different input files. When such a reset is performed, a
 single long-lived process can be reused to try out multiple test cases,
-eliminating the need for repeated fork() calls and the associated OS overhead.
+eliminating the need for repeated `fork()` calls and the associated OS overhead.
 
 The basic structure of the program that does this would be:
 
@@ -145,34 +145,34 @@ The basic structure of the program that does this would be:
 
   }
 
-  /* Exit normally */
+  /* Exit normally. */
 ```
 
-The numerical value specified within the loop controls the maximum number
-of iterations before AFL will restart the process from scratch. This minimizes
+The numerical value specified within the loop controls the maximum number of
+iterations before AFL++ will restart the process from scratch. This minimizes
 the impact of memory leaks and similar glitches; 1000 is a good starting point,
-and going much higher increases the likelihood of hiccups without giving you
-any real performance benefits.
+and going much higher increases the likelihood of hiccups without giving you any
+real performance benefits.
 
-A more detailed template is shown in `../utils/persistent_mode/.`
-Similarly to the previous mode, the feature works only with afl-clang-fast; 
-`#ifdef` guards can be used to suppress it when using other compilers.
+A more detailed template is shown in
+[utils/persistent_mode](../utils/persistent_mode). Similarly to the deferred
+initialization, the feature works only with afl-clang-fast; `#ifdef` guards can
+be used to suppress it when using other compilers.
 
-Note that as with the previous mode, the feature is easy to misuse; if you
-do not fully reset the critical state, you may end up with false positives or
-waste a whole lot of CPU power doing nothing useful at all. Be particularly
+Note that as with the deferred initialization, the feature is easy to misuse; if
+you do not fully reset the critical state, you may end up with false positives
+or waste a whole lot of CPU power doing nothing useful at all. Be particularly
 wary of memory leaks and of the state of file descriptors.
 
-PS. Because there are task switches still involved, the mode isn't as fast as
-"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
-faster than the normal `fork()` model, and compared to in-process fuzzing,
-should be a lot more robust.
+When running in this mode, the execution paths will inherently vary a bit
+depending on whether the input loop is being entered for the first time or
+executed again.
 
 ## 5) Shared memory fuzzing
 
-You can speed up the fuzzing process even more by receiving the fuzzing data
-via shared memory instead of stdin or files.
-This is a further speed multiplier of about 2x.
+You can speed up the fuzzing process even more by receiving the fuzzing data via
+shared memory instead of stdin or files. This is a further speed multiplier of
+about 2x.
 
 Setting this up is very easy:
 
@@ -181,14 +181,18 @@ After the includes set the following macro:
 ```c
 __AFL_FUZZ_INIT();
 ```
-Directly at the start of main - or if you are using the deferred forkserver
-with `__AFL_INIT()` then *after* `__AFL_INIT()` :
+
+Directly at the start of main - or if you are using the deferred forkserver with
+`__AFL_INIT()`, then *after* `__AFL_INIT()`:
+
 ```c
   unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
 ```
 
 Then as first line after the `__AFL_LOOP` while loop:
+
 ```c
   int len = __AFL_FUZZ_TESTCASE_LEN;
 ```
-and that is all!
+
+And that is all!
\ No newline at end of file
diff --git a/instrumentation/README.snapshot.md b/instrumentation/README.snapshot.md
deleted file mode 100644
index c794c2fd..00000000
--- a/instrumentation/README.snapshot.md
+++ /dev/null
@@ -1,18 +0,0 @@
-# AFL++ snapshot feature
-
-**NOTE:** the snapshot lkm is currently not supported and needs a maintainer :-)
-
-Snapshotting is a feature that makes a snapshot from a process and then
-restores its state, which is faster then forking it again.
-
-All targets compiled with llvm_mode are automatically enabled for the
-snapshot feature.
-
-To use the snapshot feature for fuzzing compile and load this kernel
-module: [https://github.com/AFLplusplus/AFL-Snapshot-LKM](https://github.com/AFLplusplus/AFL-Snapshot-LKM)
-
-Note that is has little value for persistent (__AFL_LOOP) fuzzing.
-
-## Notes
-
-Snapshot does not work with multithreaded targets yet. Still in WIP, it is now usable only for single threaded applications.
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
index e06f8b93..8d7f0c80 100644
--- a/instrumentation/SanitizerCoverageLTO.so.cc
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -235,6 +235,8 @@ class ModuleSanitizerCoverage {
   uint32_t                         autodictionary = 1;
   uint32_t                         inst = 0;
   uint32_t                         afl_global_id = 0;
+  uint32_t                         unhandled = 0;
+  uint32_t                         select_cnt = 0;
   uint64_t                         map_addr = 0;
   const char *                     skip_nozero = NULL;
   const char *                     use_threadsafe_counters = nullptr;
@@ -250,7 +252,7 @@ class ModuleSanitizerCoverage {
   Module *                         Mo = NULL;
   GlobalVariable *                 AFLMapPtr = NULL;
   Value *                          MapPtrFixed = NULL;
-  FILE *                           documentFile = NULL;
+  std::ofstream                    dFile;
   size_t                           found = 0;
   // afl++ END
 
@@ -446,8 +448,8 @@ bool ModuleSanitizerCoverage::instrumentModule(
 
   if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) {
 
-    if ((documentFile = fopen(ptr, "a")) == NULL)
-      WARNF("Cannot access document file %s", ptr);
+    dFile.open(ptr, std::ofstream::out | std::ofstream::app);
+    if (dFile.is_open()) WARNF("Cannot access document file %s", ptr);
 
   }
 
@@ -619,7 +621,6 @@ bool ModuleSanitizerCoverage::instrumentModule(
             bool   isStrncasecmp = true;
             bool   isIntMemcpy = true;
             bool   isStdString = true;
-            bool   addedNull = false;
             size_t optLen = 0;
 
             Function *Callee = callInst->getCalledFunction();
@@ -799,7 +800,6 @@ bool ModuleSanitizerCoverage::instrumentModule(
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    // addedNull = true;
 
                   }
 
@@ -907,8 +907,8 @@ bool ModuleSanitizerCoverage::instrumentModule(
 
                 if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
+
                   thestring.append("\0", 1);
-                  addedNull = true;
 
                 }
 
@@ -920,14 +920,18 @@ bool ModuleSanitizerCoverage::instrumentModule(
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false && thestring[optLen - 1] != '\0') {
+              /*
+                            if (addedNull == false && thestring[optLen - 1] !=
+                 '\0') {
 
-                thestring.append("\0", 1);  // add null byte
-                optLen++;
+                              thestring.append("\0", 1);  // add null byte
+                              optLen++;
 
-              }
+                            }
 
-              if (!isStdString) {
+              */
+              if (!isStdString &&
+                  thestring.find('\0', 0) != std::string::npos) {
 
                 // ensure we do not have garbage
                 size_t offset = thestring.find('\0', 0);
@@ -1003,12 +1007,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
     instrumentFunction(F, DTCallback, PDTCallback);
 
   // afl++ START
-  if (documentFile) {
-
-    fclose(documentFile);
-    documentFile = NULL;
-
-  }
+  if (dFile.is_open()) dFile.close();
 
   if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) {
 
@@ -1045,8 +1044,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
           M, Int64Tyi, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr");
       ConstantInt *MapAddr = ConstantInt::get(Int64Tyi, map_addr);
       StoreInst *  StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed);
-      StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"),
-                                MDNode::get(Ctx, None));
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreMapAddr);
 
     }
 
@@ -1054,22 +1052,20 @@ bool ModuleSanitizerCoverage::instrumentModule(
 
       uint32_t write_loc = afl_global_id;
 
-      if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+      write_loc = (((afl_global_id + 8) >> 3) << 3);
 
       GlobalVariable *AFLFinalLoc =
           new GlobalVariable(M, Int32Tyi, true, GlobalValue::ExternalLinkage, 0,
                              "__afl_final_loc");
       ConstantInt *const_loc = ConstantInt::get(Int32Tyi, write_loc);
       StoreInst *  StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
-      StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
-                                 MDNode::get(Ctx, None));
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreFinalLoc);
 
     }
 
     if (dictionary.size()) {
 
       size_t memlen = 0, count = 0, offset = 0;
-      char * ptr;
 
       // sort and unique the dictionary
       std::sort(dictionary.begin(), dictionary.end());
@@ -1089,13 +1085,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
 
       if (count) {
 
-        if ((ptr = (char *)malloc(memlen + count)) == NULL) {
-
-          fprintf(stderr, "Error: malloc for %lu bytes failed!\n",
-                  memlen + count);
-          exit(-1);
-
-        }
+        auto ptrhld = std::unique_ptr<char[]>(new char[memlen + count]);
 
         count = 0;
 
@@ -1103,8 +1093,8 @@ bool ModuleSanitizerCoverage::instrumentModule(
 
           if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
 
-            ptr[offset++] = (uint8_t)token.length();
-            memcpy(ptr + offset, token.c_str(), token.length());
+            ptrhld.get()[offset++] = (uint8_t)token.length();
+            memcpy(ptrhld.get() + offset, token.c_str(), token.length());
             offset += token.length();
             count++;
 
@@ -1117,17 +1107,16 @@ bool ModuleSanitizerCoverage::instrumentModule(
                                0, "__afl_dictionary_len");
         ConstantInt *const_len = ConstantInt::get(Int32Tyi, offset);
         StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen);
-        StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
-                                  MDNode::get(Ctx, None));
+        ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreDictLen);
 
         ArrayType *ArrayTy = ArrayType::get(IntegerType::get(Ctx, 8), offset);
         GlobalVariable *AFLInternalDictionary = new GlobalVariable(
             M, ArrayTy, true, GlobalValue::ExternalLinkage,
             ConstantDataArray::get(Ctx,
-                                   *(new ArrayRef<char>((char *)ptr, offset))),
+                                   *(new ArrayRef<char>(ptrhld.get(), offset))),
             "__afl_internal_dictionary");
         AFLInternalDictionary->setInitializer(ConstantDataArray::get(
-            Ctx, *(new ArrayRef<char>((char *)ptr, offset))));
+            Ctx, *(new ArrayRef<char>(ptrhld.get(), offset))));
         AFLInternalDictionary->setConstant(true);
 
         GlobalVariable *AFLDictionary = new GlobalVariable(
@@ -1138,8 +1127,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
         Value *AFLDictPtr =
             IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Tyi, 0));
         StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary);
-        StoreDict->setMetadata(M.getMDKindID("nosanitize"),
-                               MDNode::get(Ctx, None));
+        ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreDict);
 
       }
 
@@ -1156,15 +1144,16 @@ bool ModuleSanitizerCoverage::instrumentModule(
     else {
 
       char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s",
                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+               getenv("AFL_USE_TSAN") ? ", TSAN" : "",
                getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
-      OKF("Instrumented %u locations with no collisions (on average %llu "
-          "collisions would be in afl-gcc/vanilla AFL) (%s mode).",
-          inst, calculateCollisions(inst), modeline);
+      OKF("Instrumented %u locations (%u selects) without collisions (%llu "
+          "collisions have been avoided) (%s mode).",
+          inst, select_cnt, calculateCollisions(inst), modeline);
 
     }
 
@@ -1286,6 +1275,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
   const DominatorTree *    DT = DTCallback(F);
   const PostDominatorTree *PDT = PDTCallback(F);
   bool                     IsLeafFunc = true;
+  uint32_t                 skip_next = 0, local_selects = 0;
 
   for (auto &BB : F) {
 
@@ -1299,10 +1289,164 @@ void ModuleSanitizerCoverage::instrumentFunction(
         if (!Callee) continue;
         if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
         StringRef FuncName = Callee->getName();
+        if (!FuncName.compare(StringRef("dlopen")) ||
+            !FuncName.compare(StringRef("_dlopen"))) {
+
+          fprintf(stderr,
+                  "WARNING: dlopen() detected. To have coverage for a library "
+                  "that your target dlopen()'s this must either happen before "
+                  "__AFL_INIT() or you must use AFL_PRELOAD to preload all "
+                  "dlopen()'ed libraries!\n");
+          continue;
+
+        }
+
         if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue;
 
         Value *val = ConstantInt::get(Int32Ty, ++afl_global_id);
         callInst->setOperand(1, val);
+        ++inst;
+
+      }
+
+      SelectInst *selectInst = nullptr;
+
+      /*
+            std::string errMsg;
+            raw_string_ostream os(errMsg);
+            IN.print(os);
+            fprintf(stderr, "X(%u): %s\n", skip_next, os.str().c_str());
+      */
+      if (!skip_next && (selectInst = dyn_cast<SelectInst>(&IN))) {
+
+        uint32_t    vector_cnt = 0;
+        Value *     condition = selectInst->getCondition();
+        Value *     result;
+        auto        t = condition->getType();
+        IRBuilder<> IRB(selectInst->getNextNode());
+
+        ++select_cnt;
+
+        if (t->getTypeID() == llvm::Type::IntegerTyID) {
+
+          Value *val1 = ConstantInt::get(Int32Ty, ++afl_global_id);
+          Value *val2 = ConstantInt::get(Int32Ty, ++afl_global_id);
+          result = IRB.CreateSelect(condition, val1, val2);
+          skip_next = 1;
+          inst += 2;
+
+        } else
+
+#if LLVM_VERSION_MAJOR >= 14
+            if (t->getTypeID() == llvm::Type::FixedVectorTyID) {
+
+          FixedVectorType *tt = dyn_cast<FixedVectorType>(t);
+          if (tt) {
+
+            uint32_t elements = tt->getElementCount().getFixedValue();
+            vector_cnt = elements;
+            inst += vector_cnt * 2;
+            if (elements) {
+
+              FixedVectorType *GuardPtr1 =
+                  FixedVectorType::get(Int32Ty, elements);
+              FixedVectorType *GuardPtr2 =
+                  FixedVectorType::get(Int32Ty, elements);
+              Value *x, *y;
+
+              Value *val1 = ConstantInt::get(Int32Ty, ++afl_global_id);
+              Value *val2 = ConstantInt::get(Int32Ty, ++afl_global_id);
+              x = IRB.CreateInsertElement(GuardPtr1, val1, (uint64_t)0);
+              y = IRB.CreateInsertElement(GuardPtr2, val2, (uint64_t)0);
+
+              for (uint64_t i = 1; i < elements; i++) {
+
+                val1 = ConstantInt::get(Int32Ty, ++afl_global_id);
+                val2 = ConstantInt::get(Int32Ty, ++afl_global_id);
+                x = IRB.CreateInsertElement(GuardPtr1, val1, i);
+                y = IRB.CreateInsertElement(GuardPtr2, val2, i);
+
+              }
+
+              result = IRB.CreateSelect(condition, x, y);
+              skip_next = 1;
+
+            }
+
+          }
+
+        } else
+
+#endif
+        {
+
+          unhandled++;
+          continue;
+
+        }
+
+        local_selects++;
+        uint32_t vector_cur = 0;
+        /* Load SHM pointer */
+        LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+        ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr);
+
+        while (1) {
+
+          /* Get CurLoc */
+          Value *MapPtrIdx = nullptr;
+
+          /* Load counter for CurLoc */
+          if (!vector_cnt) {
+
+            MapPtrIdx = IRB.CreateGEP(MapPtr, result);
+
+          } else {
+
+            auto element = IRB.CreateExtractElement(result, vector_cur++);
+            MapPtrIdx = IRB.CreateGEP(MapPtr, element);
+
+          }
+
+          if (use_threadsafe_counters) {
+
+            IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One,
+#if LLVM_VERSION_MAJOR >= 13
+                                llvm::MaybeAlign(1),
+#endif
+                                llvm::AtomicOrdering::Monotonic);
+
+          } else {
+
+            LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+            ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter);
+
+            /* Update bitmap */
+
+            Value *Incr = IRB.CreateAdd(Counter, One);
+
+            if (skip_nozero == NULL) {
+
+              auto cf = IRB.CreateICmpEQ(Incr, Zero);
+              auto carry = IRB.CreateZExt(cf, Int8Ty);
+              Incr = IRB.CreateAdd(Incr, carry);
+
+            }
+
+            auto nosan = IRB.CreateStore(Incr, MapPtrIdx);
+            ModuleSanitizerCoverage::SetNoSanitizeMetadata(nosan);
+
+          }
+
+          if (!vector_cnt || vector_cnt == vector_cur) { break; }
+
+        }
+
+        skip_next = 1;
+
+      } else {
+
+        skip_next = 0;
 
       }
 
@@ -1336,7 +1480,7 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
 
-#if LLVM_VERSION_MAJOR > 12
+#if LLVM_VERSION_MAJOR >= 13
   if (TargetTriple.supportsCOMDAT() &&
       (TargetTriple.isOSBinFormatELF() || !F.isInterposable()))
     if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple))
@@ -1496,10 +1640,10 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   if (Options.TracePC) {
 
     IRB.CreateCall(SanCovTracePC)
-#if LLVM_VERSION_MAJOR < 12
-        ->cannotMerge();  // gets the PC using GET_CALLER_PC.
-#else
+#if LLVM_VERSION_MAJOR >= 12
         ->setCannotMerge();  // gets the PC using GET_CALLER_PC.
+#else
+        ->cannotMerge();  // gets the PC using GET_CALLER_PC.
 #endif
 
   }
@@ -1509,12 +1653,12 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     // afl++ START
     ++afl_global_id;
 
-    if (documentFile) {
+    if (dFile.is_open()) {
 
       unsigned long long int moduleID =
           (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid();
-      fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n", moduleID,
-              F.getName().str().c_str(), afl_global_id);
+      dFile << "ModuleID=" << moduleID << " Function=" << F.getName().str()
+            << " edgeID=" << afl_global_id << "\n";
 
     }
 
@@ -1533,8 +1677,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     } else {
 
       LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
-      MapPtr->setMetadata(Mo->getMDKindID("nosanitize"),
-                          MDNode::get(*Ct, None));
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr);
       MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc);
 
     }
@@ -1551,8 +1694,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     } else {
 
       LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
-      Counter->setMetadata(Mo->getMDKindID("nosanitize"),
-                           MDNode::get(*Ct, None));
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter);
 
       Value *Incr = IRB.CreateAdd(Counter, One);
 
@@ -1564,8 +1706,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
 
       }
 
-      IRB.CreateStore(Incr, MapPtrIdx)
-          ->setMetadata(Mo->getMDKindID("nosanitize"), MDNode::get(*Ct, None));
+      auto nosan = IRB.CreateStore(Incr, MapPtrIdx);
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(nosan);
 
     }
 
diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc
index 48ad2d02..d5746cc7 100644
--- a/instrumentation/SanitizerCoveragePCGUARD.so.cc
+++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc
@@ -36,7 +36,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/SpecialCaseList.h"
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+#if LLVM_VERSION_MAJOR >= 11 || \
+    (LLVM_VERSION_MAJOR == 10 && LLVM_VERSION_MINOR >= 1)
   #include "llvm/Support/VirtualFileSystem.h"
 #endif
 #include "llvm/Support/raw_ostream.h"
@@ -127,7 +128,7 @@ class ModuleSanitizerCoverage {
  public:
   ModuleSanitizerCoverage(
       const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()
-#if LLVM_MAJOR > 10
+#if (LLVM_VERSION_MAJOR >= 11)
           ,
       const SpecialCaseList *Allowlist = nullptr,
       const SpecialCaseList *Blocklist = nullptr
@@ -203,7 +204,7 @@ class ModuleSanitizerCoverage {
 
   SanitizerCoverageOptions Options;
 
-  uint32_t        instr = 0;
+  uint32_t        instr = 0, selects = 0, unhandled = 0;
   GlobalVariable *AFLMapPtr = NULL;
   ConstantInt *   One = NULL;
   ConstantInt *   Zero = NULL;
@@ -215,7 +216,7 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
  public:
   ModuleSanitizerCoverageLegacyPass(
       const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()
-#if LLVM_VERSION_MAJOR > 10
+#if LLVM_VERSION_MAJOR >= 11
           ,
       const std::vector<std::string> &AllowlistFiles =
           std::vector<std::string>(),
@@ -233,7 +234,7 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
   bool runOnModule(Module &M) override {
 
     ModuleSanitizerCoverage ModuleSancov(Options
-#if LLVM_MAJOR > 10
+#if (LLVM_VERSION_MAJOR >= 11)
                                          ,
                                          Allowlist.get(), Blocklist.get()
 #endif
@@ -283,7 +284,7 @@ PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &               M,
                                                    ModuleAnalysisManager &MAM) {
 
   ModuleSanitizerCoverage ModuleSancov(Options
-#if LLVM_MAJOR > 10
+#if (LLVM_VERSION_MAJOR >= 11)
                                        ,
                                        Allowlist.get(), Blocklist.get()
 #endif
@@ -547,14 +548,16 @@ bool ModuleSanitizerCoverage::instrumentModule(
     else {
 
       char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s",
                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+               getenv("AFL_USE_TSAN") ? ", TSAN" : "",
                getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
-      OKF("Instrumented %u locations with no collisions (%s mode).", instr,
-          modeline);
+      OKF("Instrumented %u locations with no collisions (%s mode) of which are "
+          "%u handled and %u unhandled selects.",
+          instr, modeline, selects, unhandled);
 
     }
 
@@ -747,7 +750,7 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
 
-#if LLVM_VERSION_MAJOR > 12
+#if LLVM_VERSION_MAJOR >= 13
   if (TargetTriple.supportsCOMDAT() &&
       (TargetTriple.isOSBinFormatELF() || !F.isInterposable()))
     if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple))
@@ -760,7 +763,8 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
 #endif
 
   Array->setSection(getSectionName(Section));
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+#if (LLVM_VERSION_MAJOR >= 11) || \
+    (LLVM_VERSION_MAJOR == 10 && LLVM_VERSION_MINOR >= 1)
   Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
 #else
   Array->setAlignment(Align(4));  // cheating
@@ -833,9 +837,8 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function &             F,
                                              ArrayRef<BasicBlock *> AllBlocks,
                                              bool IsLeafFunc) {
 
-  if (AllBlocks.empty()) return false;
+  uint32_t cnt_cov = 0, cnt_sel = 0, cnt_sel_inc = 0;
 
-  uint32_t special = 0;
   for (auto &BB : F) {
 
     for (auto &IN : BB) {
@@ -848,11 +851,51 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function &             F,
         if (!Callee) continue;
         if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
         StringRef FuncName = Callee->getName();
+        if (!FuncName.compare(StringRef("dlopen")) ||
+            !FuncName.compare(StringRef("_dlopen"))) {
+
+          fprintf(stderr,
+                  "WARNING: dlopen() detected. To have coverage for a library "
+                  "that your target dlopen()'s this must either happen before "
+                  "__AFL_INIT() or you must use AFL_PRELOAD to preload all "
+                  "dlopen()'ed libraries!\n");
+          continue;
+
+        }
+
         if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue;
 
-        uint32_t id = 1 + instr + (uint32_t)AllBlocks.size() + special++;
-        Value *  val = ConstantInt::get(Int32Ty, id);
-        callInst->setOperand(1, val);
+        cnt_cov++;
+
+      }
+
+      SelectInst *selectInst = nullptr;
+
+      if ((selectInst = dyn_cast<SelectInst>(&IN))) {
+
+        Value *c = selectInst->getCondition();
+        auto   t = c->getType();
+        if (t->getTypeID() == llvm::Type::IntegerTyID) {
+
+          cnt_sel++;
+          cnt_sel_inc += 2;
+
+        }
+
+#if (LLVM_VERSION_MAJOR >= 12)
+        else if (t->getTypeID() == llvm::Type::FixedVectorTyID) {
+
+          FixedVectorType *tt = dyn_cast<FixedVectorType>(t);
+          if (tt) {
+
+            cnt_sel++;
+            cnt_sel_inc += tt->getElementCount().getKnownMinValue();
+
+          }
+
+        }
+
+#endif
 
       }
 
@@ -860,11 +903,256 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function &             F,
 
   }
 
-  CreateFunctionLocalArrays(F, AllBlocks, special);
-  for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
-    InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
+  /* Create PCGUARD array */
+  CreateFunctionLocalArrays(F, AllBlocks, cnt_cov + cnt_sel_inc);
+  selects += cnt_sel;
+
+  uint32_t special = 0, local_selects = 0, skip_next = 0;
+
+  for (auto &BB : F) {
+
+    for (auto &IN : BB) {
+
+      CallInst *callInst = nullptr;
+
+      /*
+                                std::string errMsg;
+                                raw_string_ostream os(errMsg);
+                            IN.print(os);
+                            fprintf(stderr, "X: %s\n", os.str().c_str());
+      */
+      if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+        Function *Callee = callInst->getCalledFunction();
+        if (!Callee) continue;
+        if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+        StringRef FuncName = Callee->getName();
+        if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue;
+
+        IRBuilder<> IRB(callInst);
+
+        Value *GuardPtr = IRB.CreateIntToPtr(
+            IRB.CreateAdd(
+                IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                ConstantInt::get(IntptrTy, (++special + AllBlocks.size()) * 4)),
+            Int32PtrTy);
+
+        LoadInst *Idx = IRB.CreateLoad(GuardPtr);
+        ModuleSanitizerCoverage::SetNoSanitizeMetadata(Idx);
+
+        callInst->setOperand(1, Idx);
+
+      }
+
+      SelectInst *selectInst = nullptr;
+
+      if (!skip_next && (selectInst = dyn_cast<SelectInst>(&IN))) {
+
+        uint32_t    vector_cnt = 0;
+        Value *     condition = selectInst->getCondition();
+        Value *     result;
+        auto        t = condition->getType();
+        IRBuilder<> IRB(selectInst->getNextNode());
+
+        if (t->getTypeID() == llvm::Type::IntegerTyID) {
+
+          auto GuardPtr1 = IRB.CreateIntToPtr(
+              IRB.CreateAdd(
+                  IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                  ConstantInt::get(
+                      IntptrTy,
+                      (cnt_cov + ++local_selects + AllBlocks.size()) * 4)),
+              Int32PtrTy);
+
+          auto GuardPtr2 = IRB.CreateIntToPtr(
+              IRB.CreateAdd(
+                  IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                  ConstantInt::get(
+                      IntptrTy,
+                      (cnt_cov + ++local_selects + AllBlocks.size()) * 4)),
+              Int32PtrTy);
+
+          result = IRB.CreateSelect(condition, GuardPtr1, GuardPtr2);
+
+        } else
+
+#if LLVM_VERSION_MAJOR >= 14
+            if (t->getTypeID() == llvm::Type::FixedVectorTyID) {
+
+          FixedVectorType *tt = dyn_cast<FixedVectorType>(t);
+          if (tt) {
+
+            uint32_t elements = tt->getElementCount().getFixedValue();
+            vector_cnt = elements;
+            if (elements) {
+
+              FixedVectorType *GuardPtr1 =
+                  FixedVectorType::get(Int32PtrTy, elements);
+              FixedVectorType *GuardPtr2 =
+                  FixedVectorType::get(Int32PtrTy, elements);
+              Value *x, *y;
+
+              Value *val1 = IRB.CreateIntToPtr(
+                  IRB.CreateAdd(
+                      IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                      ConstantInt::get(
+                          IntptrTy,
+                          (cnt_cov + ++local_selects + AllBlocks.size()) * 4)),
+                  Int32PtrTy);
+              x = IRB.CreateInsertElement(GuardPtr1, val1, (uint64_t)0);
+
+              Value *val2 = IRB.CreateIntToPtr(
+                  IRB.CreateAdd(
+                      IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                      ConstantInt::get(
+                          IntptrTy,
+                          (cnt_cov + ++local_selects + AllBlocks.size()) * 4)),
+                  Int32PtrTy);
+              y = IRB.CreateInsertElement(GuardPtr2, val2, (uint64_t)0);
+
+              for (uint64_t i = 1; i < elements; i++) {
+
+                val1 = IRB.CreateIntToPtr(
+                    IRB.CreateAdd(
+                        IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                        ConstantInt::get(IntptrTy, (cnt_cov + ++local_selects +
+                                                    AllBlocks.size()) *
+                                                       4)),
+                    Int32PtrTy);
+                x = IRB.CreateInsertElement(x, val1, i);
+
+                val2 = IRB.CreateIntToPtr(
+                    IRB.CreateAdd(
+                        IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                        ConstantInt::get(IntptrTy, (cnt_cov + ++local_selects +
+                                                    AllBlocks.size()) *
+                                                       4)),
+                    Int32PtrTy);
+                y = IRB.CreateInsertElement(y, val2, i);
+
+              }
+
+              /*
+                          std::string errMsg;
+                          raw_string_ostream os(errMsg);
+                      x->print(os);
+                      fprintf(stderr, "X: %s\n", os.str().c_str());
+              */
+              result = IRB.CreateSelect(condition, x, y);
+
+            }
+
+          }
+
+        } else
+
+#endif
+        {
+
+          unhandled++;
+          continue;
 
-  instr += special;
+        }
+
+        local_selects++;
+        uint32_t vector_cur = 0;
+
+        /* Load SHM pointer */
+
+        LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+        ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr);
+
+        /*
+            std::string errMsg;
+            raw_string_ostream os(errMsg);
+        result->print(os);
+        fprintf(stderr, "X: %s\n", os.str().c_str());
+        */
+
+        while (1) {
+
+          /* Get CurLoc */
+          LoadInst *CurLoc = nullptr;
+          Value *   MapPtrIdx = nullptr;
+
+          /* Load counter for CurLoc */
+          if (!vector_cnt) {
+
+            CurLoc = IRB.CreateLoad(result);
+            ModuleSanitizerCoverage::SetNoSanitizeMetadata(CurLoc);
+            MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc);
+
+          } else {
+
+            auto element = IRB.CreateExtractElement(result, vector_cur++);
+            auto elementptr = IRB.CreateIntToPtr(element, Int32PtrTy);
+            auto elementld = IRB.CreateLoad(elementptr);
+            ModuleSanitizerCoverage::SetNoSanitizeMetadata(elementld);
+            MapPtrIdx = IRB.CreateGEP(MapPtr, elementld);
+
+          }
+
+          if (use_threadsafe_counters) {
+
+            IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One,
+#if LLVM_VERSION_MAJOR >= 13
+                                llvm::MaybeAlign(1),
+#endif
+                                llvm::AtomicOrdering::Monotonic);
+
+          } else {
+
+            LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+            ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter);
+
+            /* Update bitmap */
+
+            Value *Incr = IRB.CreateAdd(Counter, One);
+
+            if (skip_nozero == NULL) {
+
+              auto cf = IRB.CreateICmpEQ(Incr, Zero);
+              auto carry = IRB.CreateZExt(cf, Int8Ty);
+              Incr = IRB.CreateAdd(Incr, carry);
+
+            }
+
+            StoreInst *StoreCtx = IRB.CreateStore(Incr, MapPtrIdx);
+            ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreCtx);
+
+          }
+
+          if (!vector_cnt) {
+
+            vector_cnt = 2;
+            break;
+
+          } else if (vector_cnt == vector_cur) {
+
+            break;
+
+          }
+
+        }
+
+        skip_next = 1;
+        instr += vector_cnt;
+
+      } else {
+
+        skip_next = 0;
+
+      }
+
+    }
+
+  }
+
+  if (AllBlocks.empty() && !special && !local_selects) return false;
+
+  if (!AllBlocks.empty())
+    for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
+      InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
 
   return true;
 
@@ -881,8 +1169,6 @@ void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
     Function &F, ArrayRef<Instruction *> IndirCalls) {
 
   if (IndirCalls.empty()) return;
-  assert(Options.TracePC || Options.TracePCGuard ||
-         Options.Inline8bitCounters /*|| Options.InlineBoolFlag*/);
   for (auto I : IndirCalls) {
 
     IRBuilder<> IRB(I);
@@ -1062,10 +1348,12 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
         Int32PtrTy);
 
     LoadInst *CurLoc = IRB.CreateLoad(GuardPtr);
+    ModuleSanitizerCoverage::SetNoSanitizeMetadata(CurLoc);
 
     /* Load SHM pointer */
 
     LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+    ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr);
 
     /* Load counter for CurLoc */
 
@@ -1082,6 +1370,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     } else {
 
       LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter);
+
       /* Update bitmap */
 
       Value *Incr = IRB.CreateAdd(Counter, One);
@@ -1094,7 +1384,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
 
       }
 
-      IRB.CreateStore(Incr, MapPtrIdx);
+      StoreInst *StoreCtx = IRB.CreateStore(Incr, MapPtrIdx);
+      ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreCtx);
 
     }
 
@@ -1207,7 +1498,7 @@ INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
 
 ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
     const SanitizerCoverageOptions &Options
-#if LLVM_MAJOR > 10
+#if (LLVM_VERSION_MAJOR >= 11)
     ,
     const std::vector<std::string> &AllowlistFiles,
     const std::vector<std::string> &BlocklistFiles
@@ -1215,7 +1506,7 @@ ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
 ) {
 
   return new ModuleSanitizerCoverageLegacyPass(Options
-#if LLVM_MAJOR > 10
+#if (LLVM_VERSION_MAJOR >= 11)
                                                ,
                                                AllowlistFiles, BlocklistFiles
 #endif
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index 9acab4e7..20f325f3 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -9,7 +9,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
 
 */
@@ -22,6 +22,10 @@
 #include "cmplog.h"
 #include "llvm-alternative-coverage.h"
 
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#undef XXH_INLINE_ALL
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
@@ -154,6 +158,8 @@ static void at_exit(int signal) {
 
 }
 
+#define default_hash(a, b) XXH3_64bits(a, b)
+
 /* Uninspired gcc plugin instrumentation */
 
 void __afl_trace(const u32 x) {
@@ -269,6 +275,8 @@ static void __afl_map_shm(void) {
 
   char *id_str = getenv(SHM_ENV_VAR);
 
+  if (__afl_final_loc) { ++__afl_final_loc; }  // as we count starting 0
+
   if (__afl_final_loc) {
 
     __afl_map_size = __afl_final_loc;
@@ -664,12 +672,12 @@ static void __afl_start_snapshots(void) {
 
   u8 child_stopped = 0;
 
-  void (*old_sigchld_handler)(int) = 0;  // = signal(SIGCHLD, SIG_DFL);
+  void (*old_sigchld_handler)(int) = signal(SIGCHLD, SIG_DFL);
 
   /* Phone home and tell the parent that we're OK. If parent isn't there,
      assume we're not running in forkserver mode and just execute program. */
 
-  status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT);
+  status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT | FS_OPT_NEWCMPLOG);
   if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
   if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
     status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
@@ -920,7 +928,7 @@ static void __afl_start_forkserver(void) {
 
   u8 child_stopped = 0;
 
-  void (*old_sigchld_handler)(int) = 0;  // = signal(SIGCHLD, SIG_DFL);
+  void (*old_sigchld_handler)(int) = signal(SIGCHLD, SIG_DFL);
 
   if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) {
 
@@ -935,7 +943,12 @@ static void __afl_start_forkserver(void) {
   }
 
   if (__afl_sharedmem_fuzzing != 0) { status_for_fsrv |= FS_OPT_SHDMEM_FUZZ; }
-  if (status_for_fsrv) { status_for_fsrv |= (FS_OPT_ENABLED); }
+  if (status_for_fsrv) {
+
+    status_for_fsrv |= (FS_OPT_ENABLED | FS_OPT_NEWCMPLOG);
+
+  }
+
   memcpy(tmp, &status_for_fsrv, 4);
 
   /* Phone home and tell the parent that we're OK. If parent isn't there,
@@ -1404,6 +1417,18 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   if (start == stop || *start) return;
 
+  // If a dlopen of an instrumented library happens after the forkserver then
+  // we have a problem as we cannot increase the coverage map anymore.
+  if (__afl_already_initialized_forkserver) {
+
+    fprintf(stderr,
+            "[-] FATAL: forkserver is already up, but an instrumented dlopen() "
+            "library loaded afterwards. You must AFL_PRELOAD such libraries to "
+            "be able to fuzz them or LD_PRELOAD to run outside of afl-fuzz.\n");
+    abort();
+
+  }
+
   x = getenv("AFL_INST_RATIO");
   if (x) inst_ratio = (u32)atoi(x);
 
@@ -1416,6 +1441,7 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   /* instrumented code is loaded *after* our forkserver is up. this is a
      problem. We cannot prevent collisions then :( */
+  /*
   if (__afl_already_initialized_forkserver &&
       __afl_final_loc + 1 + stop - start > __afl_map_size) {
 
@@ -1448,6 +1474,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  */
+
   /* Make sure that the first element in the range is always set - we use that
      to avoid duplicate calls (which can happen as an artifact of the underlying
      implementation in LLVM). */
@@ -1499,8 +1527,7 @@ void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2, uint8_t attr) {
   if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1530,8 +1557,7 @@ void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2, uint8_t attr) {
   if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1569,8 +1595,7 @@ void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2, uint8_t attr) {
   if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1608,8 +1633,7 @@ void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2, uint8_t attr) {
   if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1652,8 +1676,7 @@ void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t attr,
   if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1696,8 +1719,7 @@ void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2, uint8_t attr) {
   if (likely(!__afl_cmp_map)) return;
 
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1802,8 +1824,8 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
   for (uint64_t i = 0; i < cases[0]; i++) {
 
     uintptr_t k = (uintptr_t)__builtin_return_address(0) + i;
-    k = (k >> 4) ^ (k << 8);
-    k &= CMP_MAP_W - 1;
+    k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) &
+                    (CMP_MAP_W - 1));
 
     u32 hits;
 
@@ -1880,11 +1902,108 @@ static int area_is_valid(void *ptr, size_t len) {
 
 }
 
+/* hook for string with length functions, eg. strncmp, strncasecmp etc.
+   Note that we ignore the len parameter and take longer strings if present. */
+void __cmplog_rtn_hook_strn(u8 *ptr1, u8 *ptr2, u64 len) {
+
+  // fprintf(stderr, "RTN1 %p %p %u\n", ptr1, ptr2, len);
+  if (likely(!__afl_cmp_map)) return;
+  if (unlikely(!len)) return;
+  int len0 = MIN(len, 31);
+  int len1 = strnlen(ptr1, len0);
+  if (len1 < 31) len1 = area_is_valid(ptr1, len1 + 1);
+  int len2 = strnlen(ptr2, len0);
+  if (len2 < 31) len2 = area_is_valid(ptr1, len2 + 1);
+  int l = MAX(len1, len2);
+  if (l < 2) return;
+
+  uintptr_t k = (uintptr_t)__builtin_return_address(0);
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
+
+  u32 hits;
+
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = l - 1;
+    hits = 0;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < l) {
+
+      __afl_cmp_map->headers[k].shape = l - 1;
+
+    }
+
+  }
+
+  struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
+  hits &= CMP_MAP_RTN_H - 1;
+
+  cmpfn[hits].v0_len = 0x80 + l;
+  cmpfn[hits].v1_len = 0x80 + l;
+  __builtin_memcpy(cmpfn[hits].v0, ptr1, len1);
+  __builtin_memcpy(cmpfn[hits].v1, ptr2, len2);
+  // fprintf(stderr, "RTN3\n");
+
+}
+
+/* hook for string functions, eg. strcmp, strcasecmp etc. */
+void __cmplog_rtn_hook_str(u8 *ptr1, u8 *ptr2) {
+
+  // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2);
+  if (likely(!__afl_cmp_map)) return;
+  if (unlikely(!ptr1 || !ptr2)) return;
+  int len1 = strnlen(ptr1, 30) + 1;
+  int len2 = strnlen(ptr2, 30) + 1;
+  int l = MAX(len1, len2);
+  if (l < 3) return;
+
+  uintptr_t k = (uintptr_t)__builtin_return_address(0);
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
+
+  u32 hits;
+
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = l - 1;
+    hits = 0;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < l) {
+
+      __afl_cmp_map->headers[k].shape = l - 1;
+
+    }
+
+  }
+
+  struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
+  hits &= CMP_MAP_RTN_H - 1;
+
+  cmpfn[hits].v0_len = 0x80 + len1;
+  cmpfn[hits].v1_len = 0x80 + len2;
+  __builtin_memcpy(cmpfn[hits].v0, ptr1, len1);
+  __builtin_memcpy(cmpfn[hits].v1, ptr2, len2);
+  // fprintf(stderr, "RTN3\n");
+
+}
+
+/* hook function for all other func(ptr, ptr, ...) variants */
 void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   /*
     u32 i;
-    if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
+    if (area_is_valid(ptr1, 31) <= 0 || area_is_valid(ptr2, 31) <= 0) return;
     fprintf(stderr, "rtn arg0=");
     for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr1[i]);
@@ -1894,18 +2013,17 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
     fprintf(stderr, "\n");
   */
 
-  if (likely(!__afl_cmp_map)) return;
   // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2);
+  if (likely(!__afl_cmp_map)) return;
   int l1, l2;
-  if ((l1 = area_is_valid(ptr1, 32)) <= 0 ||
-      (l2 = area_is_valid(ptr2, 32)) <= 0)
+  if ((l1 = area_is_valid(ptr1, 31)) <= 0 ||
+      (l2 = area_is_valid(ptr2, 31)) <= 0)
     return;
-  int len = MIN(l1, l2);
+  int len = MIN(31, MIN(l1, l2));
 
   // fprintf(stderr, "RTN2 %u\n", len);
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
-  k = (k >> 4) ^ (k << 8);
-  k &= CMP_MAP_W - 1;
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
 
   u32 hits;
 
@@ -1928,15 +2046,83 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   }
 
+  struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
   hits &= CMP_MAP_RTN_H - 1;
-  __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
-                   ptr1, len);
-  __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1,
-                   ptr2, len);
+
+  cmpfn[hits].v0_len = len;
+  cmpfn[hits].v1_len = len;
+  __builtin_memcpy(cmpfn[hits].v0, ptr1, len);
+  __builtin_memcpy(cmpfn[hits].v1, ptr2, len);
   // fprintf(stderr, "RTN3\n");
 
 }
 
+/* hook for func(ptr, ptr, len, ...) looking functions.
+   Note that for the time being we ignore len as this could be wrong
+   information and pass it on to the standard binary rtn hook */
+void __cmplog_rtn_hook_n(u8 *ptr1, u8 *ptr2, u64 len) {
+
+  (void)(len);
+  __cmplog_rtn_hook(ptr1, ptr2);
+
+#if 0
+  /*
+    u32 i;
+    if (area_is_valid(ptr1, 31) <= 0 || area_is_valid(ptr2, 31) <= 0) return;
+    fprintf(stderr, "rtn_n len=%u arg0=", len);
+    for (i = 0; i < len; i++)
+      fprintf(stderr, "%02x", ptr1[i]);
+    fprintf(stderr, " arg1=");
+    for (i = 0; i < len; i++)
+      fprintf(stderr, "%02x", ptr2[i]);
+    fprintf(stderr, "\n");
+  */
+
+  // fprintf(stderr, "RTN1 %p %p %u\n", ptr1, ptr2, len);
+  if (likely(!__afl_cmp_map)) return;
+  if (unlikely(!len)) return;
+  int l = MIN(31, len);
+
+  if ((l = area_is_valid(ptr1, l)) <= 0 || (l = area_is_valid(ptr2, l)) <= 0)
+    return;
+
+  // fprintf(stderr, "RTN2 %u\n", l);
+  uintptr_t k = (uintptr_t)__builtin_return_address(0);
+  k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
+
+  u32 hits;
+
+  if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+    __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+    __afl_cmp_map->headers[k].hits = 1;
+    __afl_cmp_map->headers[k].shape = l - 1;
+    hits = 0;
+
+  } else {
+
+    hits = __afl_cmp_map->headers[k].hits++;
+
+    if (__afl_cmp_map->headers[k].shape < l) {
+
+      __afl_cmp_map->headers[k].shape = l - 1;
+
+    }
+
+  }
+
+  struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
+  hits &= CMP_MAP_RTN_H - 1;
+
+  cmpfn[hits].v0_len = l;
+  cmpfn[hits].v1_len = l;
+  __builtin_memcpy(cmpfn[hits].v0, ptr1, l);
+  __builtin_memcpy(cmpfn[hits].v1, ptr2, l);
+  // fprintf(stderr, "RTN3\n");
+#endif
+
+}
+
 // gcc libstdc++
 // _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc
 static u8 *get_gcc_stdstring(u8 *string) {
@@ -2084,5 +2270,11 @@ void __afl_coverage_interesting(u8 val, u32 id) {
 
 }
 
+void __afl_set_persistent_mode(u8 mode) {
+
+  is_persistent = mode;
+
+}
+
 #undef write_error
 
diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc
index 3b7eb878..df2b6f2a 100644
--- a/instrumentation/afl-gcc-pass.so.cc
+++ b/instrumentation/afl-gcc-pass.so.cc
@@ -30,7 +30,7 @@
    GNU General Public License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
  */
 
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index 3239ea91..e5e367a7 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -281,7 +281,7 @@ void scanForDangerousFunctions(llvm::Module *M) {
 
   if (!M) return;
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
 
   for (GlobalIFunc &IF : M->ifuncs()) {
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
index 4622e488..bf07a154 100644
--- a/instrumentation/afl-llvm-dict2file.so.cc
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -10,7 +10,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
    This library is plugged into LLVM when invoking clang through afl-clang-lto.
 
@@ -66,6 +66,9 @@ namespace {
 
 class AFLdict2filePass : public ModulePass {
 
+  std::ofstream of;
+  void          dict2file(u8 *, u32);
+
  public:
   static char ID;
 
@@ -81,7 +84,7 @@ class AFLdict2filePass : public ModulePass {
 
 }  // namespace
 
-void dict2file(int fd, u8 *mem, u32 len) {
+void AFLdict2filePass::dict2file(u8 *mem, u32 len) {
 
   u32  i, j, binary = 0;
   char line[MAX_AUTO_EXTRA * 8], tmp[8];
@@ -113,9 +116,8 @@ void dict2file(int fd, u8 *mem, u32 len) {
 
   line[j] = 0;
   strcat(line, "\"\n");
-  if (write(fd, line, strlen(line)) <= 0)
-    PFATAL("Could not write to dictionary file");
-  fsync(fd);
+  of << line;
+  of.flush();
 
   if (!be_quiet) fprintf(stderr, "Found dictionary token: %s", line);
 
@@ -125,7 +127,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
   DenseMap<Value *, std::string *> valueMap;
   char *                           ptr;
-  int                              fd, found = 0;
+  int                              found = 0;
 
   /* Show a banner */
   setvbuf(stdout, NULL, _IONBF, 0);
@@ -146,8 +148,8 @@ bool AFLdict2filePass::runOnModule(Module &M) {
   if (!ptr || *ptr != '/')
     FATAL("AFL_LLVM_DICT2FILE is not set to an absolute path: %s", ptr);
 
-  if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT | O_DSYNC, 0644)) < 0)
-    PFATAL("Could not open/create %s.", ptr);
+  of.open(ptr, std::ofstream::out | std::ofstream::app);
+  if (!of.is_open()) PFATAL("Could not open/create %s.", ptr);
 
   /* Instrument all the things! */
 
@@ -264,11 +266,11 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
               }
 
-              dict2file(fd, (u8 *)&val, len);
+              dict2file((u8 *)&val, len);
               found++;
               if (val2) {
 
-                dict2file(fd, (u8 *)&val2, len);
+                dict2file((u8 *)&val2, len);
                 found++;
 
               }
@@ -289,7 +291,6 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           bool   isIntMemcpy = true;
           bool   isStdString = true;
           bool   isStrstr = true;
-          bool   addedNull = false;
           size_t optLen = 0;
 
           Function *Callee = callInst->getCalledFunction();
@@ -588,8 +589,8 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
               if (optLen < 2) { continue; }
               if (literalLength + 1 == optLen) {  // add null byte
+
                 thestring.append("\0", 1);
-                addedNull = true;
 
               }
 
@@ -601,14 +602,17 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           // was not already added
           if (!isMemcmp) {
 
-            if (addedNull == false && thestring[optLen - 1] != '\0') {
+            /*
+                        if (addedNull == false && thestring[optLen - 1] != '\0')
+               {
 
-              thestring.append("\0", 1);  // add null byte
-              optLen++;
+                          thestring.append("\0", 1);  // add null byte
+                          optLen++;
 
-            }
+                        }
 
-            if (!isStdString) {
+            */
+            if (!isStdString && thestring.find('\0', 0) != std::string::npos) {
 
               // ensure we do not have garbage
               size_t offset = thestring.find('\0', 0);
@@ -630,7 +634,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
           ptr = (char *)thestring.c_str();
 
-          dict2file(fd, (u8 *)ptr, optLen);
+          dict2file((u8 *)ptr, optLen);
           found++;
 
         }
@@ -641,7 +645,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
   }
 
-  close(fd);
+  of.close();
 
   /* Say something nice. */
 
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
deleted file mode 100644
index e300044c..00000000
--- a/instrumentation/afl-llvm-lto-instrumentation.so.cc
+++ /dev/null
@@ -1,1118 +0,0 @@
-/*
-   american fuzzy lop++ - LLVM LTO instrumentation pass
-   ----------------------------------------------------
-
-   Written by Marc Heuse <mh@mh-sec.de>
-
-   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at:
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   This library is plugged into LLVM when invoking clang through afl-clang-lto.
-
- */
-
-#define AFL_LLVM_PASS
-
-#include "config.h"
-#include "debug.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/time.h>
-
-#include <list>
-#include <string>
-#include <fstream>
-#include <set>
-#include <iostream>
-
-#include "llvm/Config/llvm-config.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/Constants.h"
-
-#include "afl-llvm-common.h"
-
-using namespace llvm;
-
-namespace {
-
-class AFLLTOPass : public ModulePass {
-
- public:
-  static char ID;
-
-  AFLLTOPass() : ModulePass(ID) {
-
-    char *ptr;
-
-    if (getenv("AFL_DEBUG")) debug = 1;
-    if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
-      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 ||
-          afl_global_id >= MAP_SIZE)
-        FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n",
-              ptr, MAP_SIZE - 1);
-
-    skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
-
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-
-    ModulePass::getAnalysisUsage(AU);
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<LoopInfoWrapperPass>();
-
-  }
-
-  bool runOnModule(Module &M) override;
-
- protected:
-  uint32_t               afl_global_id = 1, autodictionary = 1;
-  uint32_t               function_minimum_size = 1;
-  uint32_t               inst_blocks = 0, inst_funcs = 0, total_instr = 0;
-  unsigned long long int map_addr = 0x10000;
-  const char *           skip_nozero = NULL;
-  const char *           use_threadsafe_counters = nullptr;
-
-};
-
-}  // namespace
-
-bool AFLLTOPass::runOnModule(Module &M) {
-
-  LLVMContext &            C = M.getContext();
-  std::vector<std::string> dictionary;
-  //  std::vector<CallInst *>          calls;
-  DenseMap<Value *, std::string *> valueMap;
-  std::vector<BasicBlock *>        BlockList;
-  char *                           ptr;
-  FILE *                           documentFile = NULL;
-  size_t                           found = 0;
-
-  srand((unsigned int)time(NULL));
-
-  unsigned long long int moduleID =
-      (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid();
-
-  IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
-  IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
-  IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
-
-  /* Show a banner */
-  setvbuf(stdout, NULL, _IONBF, 0);
-
-  if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
-
-    SAYF(cCYA "afl-llvm-lto" VERSION cRST
-              " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
-
-  } else
-
-    be_quiet = 1;
-
-  use_threadsafe_counters = getenv("AFL_LLVM_THREADSAFE_INST");
-
-  if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) {
-
-    if ((documentFile = fopen(ptr, "a")) == NULL)
-      WARNF("Cannot access document file %s", ptr);
-
-  }
-
-  // we make this the default as the fixed map has problems with
-  // defered forkserver, early constructors, ifuncs and maybe more
-  /*if (getenv("AFL_LLVM_MAP_DYNAMIC"))*/
-  map_addr = 0;
-
-  if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) {
-
-    uint64_t val;
-    if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) {
-
-      map_addr = 0;
-
-    } else if (getenv("AFL_LLVM_MAP_DYNAMIC")) {
-
-      FATAL(
-          "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used together");
-
-    } else if (strncmp(ptr, "0x", 2) != 0) {
-
-      map_addr = 0x10000;  // the default
-
-    } else {
-
-      val = strtoull(ptr, NULL, 16);
-      if (val < 0x100 || val > 0xffffffff00000000) {
-
-        FATAL(
-            "AFL_LLVM_MAP_ADDR must be a value between 0x100 and "
-            "0xffffffff00000000");
-
-      }
-
-      map_addr = val;
-
-    }
-
-  }
-
-  if (debug) { fprintf(stderr, "map address is 0x%llx\n", map_addr); }
-
-  /* Get/set the globals for the SHM region. */
-
-  GlobalVariable *AFLMapPtr = NULL;
-  Value *         MapPtrFixed = NULL;
-
-  if (!map_addr) {
-
-    AFLMapPtr =
-        new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
-                           GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
-
-  } else {
-
-    ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
-    MapPtrFixed =
-        ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Ty));
-
-  }
-
-  ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
-  ConstantInt *One = ConstantInt::get(Int8Ty, 1);
-
-  // This dumps all inialized global strings - might be useful in the future
-  /*
-  for (auto G=M.getGlobalList().begin(); G!=M.getGlobalList().end(); G++) {
-
-    GlobalVariable &GV=*G;
-    if (!GV.getName().str().empty()) {
-
-      fprintf(stderr, "Global Variable: %s", GV.getName().str().c_str());
-      if (GV.hasInitializer())
-        if (auto *Val = dyn_cast<ConstantDataArray>(GV.getInitializer()))
-          fprintf(stderr, " Value: \"%s\"", Val->getAsString().str().c_str());
-      fprintf(stderr, "\n");
-
-    }
-
-  }
-
-  */
-
-  scanForDangerousFunctions(&M);
-
-  /* Instrument all the things! */
-
-  int inst_blocks = 0;
-
-  for (auto &F : M) {
-
-    /*For debugging
-    AttributeSet X = F.getAttributes().getFnAttributes();
-    fprintf(stderr, "DEBUG: Module %s Function %s attributes %u\n",
-      M.getName().str().c_str(), F.getName().str().c_str(),
-      X.getNumAttributes());
-    */
-
-    if (F.size() < function_minimum_size) continue;
-    if (isIgnoreFunction(&F)) continue;
-
-    // the instrument file list check
-    AttributeList Attrs = F.getAttributes();
-    if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) {
-
-      if (debug)
-        fprintf(stderr,
-                "DEBUG: Function %s is not in a source file that was specified "
-                "in the instrument file list\n",
-                F.getName().str().c_str());
-      continue;
-
-    }
-
-    std::vector<BasicBlock *> InsBlocks;
-
-    if (autodictionary) {
-
-      /*  Some implementation notes.
-       *
-       *  We try to handle 3 cases:
-       *  - memcmp("foo", arg, 3) <- literal string
-       *  - static char globalvar[] = "foo";
-       *    memcmp(globalvar, arg, 3) <- global variable
-       *  - char localvar[] = "foo";
-       *    memcmp(locallvar, arg, 3) <- local variable
-       *
-       *  The local variable case is the hardest. We can only detect that
-       *  case if there is no reassignment or change in the variable.
-       *  And it might not work across llvm version.
-       *  What we do is hooking the initializer function for local variables
-       *  (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
-       *  variable. And if that variable is then used in a compare function
-       *  we use that noted string.
-       *  This seems not to work for tokens that have a size <= 4 :-(
-       *
-       *  - if the compared length is smaller than the string length we
-       *    save the full string. This is likely better for fuzzing but
-       *    might be wrong in a few cases depending on optimizers
-       *
-       *  - not using StringRef because there is a bug in the llvm 11
-       *    checkout I am using which sometimes points to wrong strings
-       *
-       *  Over and out. Took me a full day. damn. mh/vh
-       */
-
-      for (auto &BB : F) {
-
-        for (auto &IN : BB) {
-
-          CallInst *callInst = nullptr;
-          CmpInst * cmpInst = nullptr;
-
-          if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
-
-            Value *      op = cmpInst->getOperand(1);
-            ConstantInt *ilen = dyn_cast<ConstantInt>(op);
-
-            if (ilen && ilen->uge(0xffffffffffffffff) == false) {
-
-              u64 val2 = 0, val = ilen->getZExtValue();
-              u32 len = 0;
-              if (val > 0x10000 && val < 0xffffffff) len = 4;
-              if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8;
-
-              if (len) {
-
-                auto c = cmpInst->getPredicate();
-
-                switch (c) {
-
-                  case CmpInst::FCMP_OGT:  // fall through
-                  case CmpInst::FCMP_OLE:  // fall through
-                  case CmpInst::ICMP_SLE:  // fall through
-                  case CmpInst::ICMP_SGT:
-
-                    // signed comparison and it is a negative constant
-                    if ((len == 4 && (val & 80000000)) ||
-                        (len == 8 && (val & 8000000000000000))) {
-
-                      if ((val & 0xffff) != 1) val2 = val - 1;
-                      break;
-
-                    }
-
-                    // fall through
-
-                  case CmpInst::FCMP_UGT:  // fall through
-                  case CmpInst::FCMP_ULE:  // fall through
-                  case CmpInst::ICMP_UGT:  // fall through
-                  case CmpInst::ICMP_ULE:
-                    if ((val & 0xffff) != 0xfffe) val2 = val + 1;
-                    break;
-
-                  case CmpInst::FCMP_OLT:  // fall through
-                  case CmpInst::FCMP_OGE:  // fall through
-                  case CmpInst::ICMP_SLT:  // fall through
-                  case CmpInst::ICMP_SGE:
-
-                    // signed comparison and it is a negative constant
-                    if ((len == 4 && (val & 80000000)) ||
-                        (len == 8 && (val & 8000000000000000))) {
-
-                      if ((val & 0xffff) != 1) val2 = val - 1;
-                      break;
-
-                    }
-
-                    // fall through
-
-                  case CmpInst::FCMP_ULT:  // fall through
-                  case CmpInst::FCMP_UGE:  // fall through
-                  case CmpInst::ICMP_ULT:  // fall through
-                  case CmpInst::ICMP_UGE:
-                    if ((val & 0xffff) != 1) val2 = val - 1;
-                    break;
-
-                  default:
-                    val2 = 0;
-
-                }
-
-                dictionary.push_back(std::string((char *)&val, len));
-                found++;
-
-                if (val2) {
-
-                  dictionary.push_back(std::string((char *)&val2, len));
-                  found++;
-
-                }
-
-              }
-
-            }
-
-          }
-
-          if ((callInst = dyn_cast<CallInst>(&IN))) {
-
-            bool   isStrcmp = true;
-            bool   isMemcmp = true;
-            bool   isStrncmp = true;
-            bool   isStrcasecmp = true;
-            bool   isStrncasecmp = true;
-            bool   isIntMemcpy = true;
-            bool   isStdString = true;
-            bool   addedNull = false;
-            size_t optLen = 0;
-
-            Function *Callee = callInst->getCalledFunction();
-            if (!Callee) continue;
-            if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
-            std::string FuncName = Callee->getName().str();
-
-            isStrcmp &= (!FuncName.compare("strcmp") ||
-                         !FuncName.compare("xmlStrcmp") ||
-                         !FuncName.compare("xmlStrEqual") ||
-                         !FuncName.compare("g_strcmp0") ||
-                         !FuncName.compare("curl_strequal") ||
-                         !FuncName.compare("strcsequal"));
-            isMemcmp &=
-                (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") ||
-                 !FuncName.compare("CRYPTO_memcmp") ||
-                 !FuncName.compare("OPENSSL_memcmp") ||
-                 !FuncName.compare("memcmp_const_time") ||
-                 !FuncName.compare("memcmpct"));
-            isStrncmp &= (!FuncName.compare("strncmp") ||
-                          !FuncName.compare("xmlStrncmp") ||
-                          !FuncName.compare("curl_strnequal"));
-            isStrcasecmp &= (!FuncName.compare("strcasecmp") ||
-                             !FuncName.compare("stricmp") ||
-                             !FuncName.compare("ap_cstr_casecmp") ||
-                             !FuncName.compare("OPENSSL_strcasecmp") ||
-                             !FuncName.compare("xmlStrcasecmp") ||
-                             !FuncName.compare("g_strcasecmp") ||
-                             !FuncName.compare("g_ascii_strcasecmp") ||
-                             !FuncName.compare("Curl_strcasecompare") ||
-                             !FuncName.compare("Curl_safe_strcasecompare") ||
-                             !FuncName.compare("cmsstrcasecmp"));
-            isStrncasecmp &= (!FuncName.compare("strncasecmp") ||
-                              !FuncName.compare("strnicmp") ||
-                              !FuncName.compare("ap_cstr_casecmpn") ||
-                              !FuncName.compare("OPENSSL_strncasecmp") ||
-                              !FuncName.compare("xmlStrncasecmp") ||
-                              !FuncName.compare("g_ascii_strncasecmp") ||
-                              !FuncName.compare("Curl_strncasecompare") ||
-                              !FuncName.compare("g_strncasecmp"));
-            isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
-            isStdString &=
-                ((FuncName.find("basic_string") != std::string::npos &&
-                  FuncName.find("compare") != std::string::npos) ||
-                 (FuncName.find("basic_string") != std::string::npos &&
-                  FuncName.find("find") != std::string::npos));
-
-            /* we do something different here, putting this BB and the
-               successors in a block map */
-            if (!FuncName.compare("__afl_persistent_loop")) {
-
-              BlockList.push_back(&BB);
-              /*
-                            for (succ_iterator SI = succ_begin(&BB), SE =
-                 succ_end(&BB); SI != SE; ++SI) {
-
-                              BasicBlock *succ = *SI;
-                              BlockList.push_back(succ);
-
-                            }
-
-              */
-
-            }
-
-            if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
-                !isStrncasecmp && !isIntMemcpy && !isStdString)
-              continue;
-
-            /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
-             * prototype */
-            FunctionType *FT = Callee->getFunctionType();
-
-            isStrcmp &= FT->getNumParams() == 2 &&
-                        FT->getReturnType()->isIntegerTy(32) &&
-                        FT->getParamType(0) == FT->getParamType(1) &&
-                        FT->getParamType(0) ==
-                            IntegerType::getInt8PtrTy(M.getContext());
-            isStrcasecmp &= FT->getNumParams() == 2 &&
-                            FT->getReturnType()->isIntegerTy(32) &&
-                            FT->getParamType(0) == FT->getParamType(1) &&
-                            FT->getParamType(0) ==
-                                IntegerType::getInt8PtrTy(M.getContext());
-            isMemcmp &= FT->getNumParams() == 3 &&
-                        FT->getReturnType()->isIntegerTy(32) &&
-                        FT->getParamType(0)->isPointerTy() &&
-                        FT->getParamType(1)->isPointerTy() &&
-                        FT->getParamType(2)->isIntegerTy();
-            isStrncmp &= FT->getNumParams() == 3 &&
-                         FT->getReturnType()->isIntegerTy(32) &&
-                         FT->getParamType(0) == FT->getParamType(1) &&
-                         FT->getParamType(0) ==
-                             IntegerType::getInt8PtrTy(M.getContext()) &&
-                         FT->getParamType(2)->isIntegerTy();
-            isStrncasecmp &= FT->getNumParams() == 3 &&
-                             FT->getReturnType()->isIntegerTy(32) &&
-                             FT->getParamType(0) == FT->getParamType(1) &&
-                             FT->getParamType(0) ==
-                                 IntegerType::getInt8PtrTy(M.getContext()) &&
-                             FT->getParamType(2)->isIntegerTy();
-            isStdString &= FT->getNumParams() >= 2 &&
-                           FT->getParamType(0)->isPointerTy() &&
-                           FT->getParamType(1)->isPointerTy();
-
-            if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
-                !isStrncasecmp && !isIntMemcpy && !isStdString)
-              continue;
-
-            /* is a str{n,}{case,}cmp/memcmp, check if we have
-             * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
-             * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
-             * memcmp(x, "const", ..) or memcmp("const", x, ..) */
-            Value *Str1P = callInst->getArgOperand(0),
-                  *Str2P = callInst->getArgOperand(1);
-            std::string Str1, Str2;
-            StringRef   TmpStr;
-            bool        HasStr1;
-            getConstantStringInfo(Str1P, TmpStr);
-            if (TmpStr.empty()) {
-
-              HasStr1 = false;
-
-            } else {
-
-              HasStr1 = true;
-              Str1 = TmpStr.str();
-
-            }
-
-            bool HasStr2;
-            getConstantStringInfo(Str2P, TmpStr);
-            if (TmpStr.empty()) {
-
-              HasStr2 = false;
-
-            } else {
-
-              HasStr2 = true;
-              Str2 = TmpStr.str();
-
-            }
-
-            if (debug)
-              fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
-                      FuncName.c_str(), Str1P, Str1P->getName().str().c_str(),
-                      Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P,
-                      Str2P->getName().str().c_str(), Str2.c_str(),
-                      HasStr2 == true ? "true" : "false");
-
-            // we handle the 2nd parameter first because of llvm memcpy
-            if (!HasStr2) {
-
-              auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
-              if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
-
-                if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
-
-                  if (Var->hasInitializer()) {
-
-                    if (auto *Array = dyn_cast<ConstantDataArray>(
-                            Var->getInitializer())) {
-
-                      HasStr2 = true;
-                      Str2 = Array->getRawDataValues().str();
-
-                    }
-
-                  }
-
-                }
-
-              }
-
-            }
-
-            // for the internal memcpy routine we only care for the second
-            // parameter and are not reporting anything.
-            if (isIntMemcpy == true) {
-
-              if (HasStr2 == true) {
-
-                Value *      op2 = callInst->getArgOperand(2);
-                ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
-                if (ilen) {
-
-                  uint64_t literalLength = Str2.size();
-                  uint64_t optLength = ilen->getZExtValue();
-                  if (optLength > literalLength + 1) {
-
-                    optLength = Str2.length() + 1;
-
-                  }
-
-                  if (literalLength + 1 == optLength) {
-
-                    Str2.append("\0", 1);  // add null byte
-                    // addedNull = true;
-
-                  }
-
-                }
-
-                valueMap[Str1P] = new std::string(Str2);
-
-                if (debug)
-                  fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P);
-                continue;
-
-              }
-
-              continue;
-
-            }
-
-            // Neither a literal nor a global variable?
-            // maybe it is a local variable that we saved
-            if (!HasStr2) {
-
-              std::string *strng = valueMap[Str2P];
-              if (strng && !strng->empty()) {
-
-                Str2 = *strng;
-                HasStr2 = true;
-                if (debug)
-                  fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
-                          Str2P);
-
-              }
-
-            }
-
-            if (!HasStr1) {
-
-              auto Ptr = dyn_cast<ConstantExpr>(Str1P);
-
-              if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
-
-                if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
-
-                  if (Var->hasInitializer()) {
-
-                    if (auto *Array = dyn_cast<ConstantDataArray>(
-                            Var->getInitializer())) {
-
-                      HasStr1 = true;
-                      Str1 = Array->getRawDataValues().str();
-
-                    }
-
-                  }
-
-                }
-
-              }
-
-            }
-
-            // Neither a literal nor a global variable?
-            // maybe it is a local variable that we saved
-            if (!HasStr1) {
-
-              std::string *strng = valueMap[Str1P];
-              if (strng && !strng->empty()) {
-
-                Str1 = *strng;
-                HasStr1 = true;
-                if (debug)
-                  fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
-                          Str1P);
-
-              }
-
-            }
-
-            /* handle cases of one string is const, one string is variable */
-            if (!(HasStr1 ^ HasStr2)) continue;
-
-            std::string thestring;
-
-            if (HasStr1)
-              thestring = Str1;
-            else
-              thestring = Str2;
-
-            optLen = thestring.length();
-            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
-
-            if (isMemcmp || isStrncmp || isStrncasecmp) {
-
-              Value *      op2 = callInst->getArgOperand(2);
-              ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
-
-              if (ilen) {
-
-                uint64_t literalLength = optLen;
-                optLen = ilen->getZExtValue();
-                if (optLen > literalLength + 1) { optLen = literalLength + 1; }
-                if (optLen < 2) { continue; }
-                if (literalLength + 1 == optLen) {  // add null byte
-                  thestring.append("\0", 1);
-                  addedNull = true;
-
-                }
-
-              }
-
-            }
-
-            // add null byte if this is a string compare function and a null
-            // was not already added
-            if (!isMemcmp) {
-
-              if (addedNull == false && thestring[optLen - 1] != '\0') {
-
-                thestring.append("\0", 1);  // add null byte
-                optLen++;
-
-              }
-
-              if (!isStdString) {
-
-                // ensure we do not have garbage
-                size_t offset = thestring.find('\0', 0);
-                if (offset + 1 < optLen) optLen = offset + 1;
-                thestring = thestring.substr(0, optLen);
-
-              }
-
-            }
-
-            if (!be_quiet) {
-
-              fprintf(stderr, "%s: length %zu/%zu \"", FuncName.c_str(), optLen,
-                      thestring.length());
-              for (uint8_t i = 0; i < thestring.length(); i++) {
-
-                uint8_t c = thestring[i];
-                if (c <= 32 || c >= 127)
-                  fprintf(stderr, "\\x%02x", c);
-                else
-                  fprintf(stderr, "%c", c);
-
-              }
-
-              fprintf(stderr, "\"\n");
-
-            }
-
-            // we take the longer string, even if the compare was to a
-            // shorter part. Note that depending on the optimizer of the
-            // compiler this can be wrong, but it is more likely that this
-            // is helping the fuzzer
-            if (optLen != thestring.length()) optLen = thestring.length();
-            if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
-            if (optLen < MIN_AUTO_EXTRA)  // too short? skip
-              continue;
-
-            dictionary.push_back(thestring.substr(0, optLen));
-
-          }
-
-        }
-
-      }
-
-    }
-
-    for (auto &BB : F) {
-
-      if (F.size() == 1) {
-
-        InsBlocks.push_back(&BB);
-        continue;
-
-      }
-
-      uint32_t succ = 0;
-      for (succ_iterator SI = succ_begin(&BB), SE = succ_end(&BB); SI != SE;
-           ++SI)
-        if ((*SI)->size() > 0) succ++;
-      if (succ < 2)  // no need to instrument
-        continue;
-
-      if (BlockList.size()) {
-
-        int skip = 0;
-        for (uint32_t k = 0; k < BlockList.size(); k++) {
-
-          if (&BB == BlockList[k]) {
-
-            if (debug)
-              fprintf(stderr,
-                      "DEBUG: Function %s skipping BB with/after __afl_loop\n",
-                      F.getName().str().c_str());
-            skip = 1;
-
-          }
-
-        }
-
-        if (skip) continue;
-
-      }
-
-      InsBlocks.push_back(&BB);
-
-    }
-
-    if (InsBlocks.size() > 0) {
-
-      uint32_t i = InsBlocks.size();
-
-      do {
-
-        --i;
-        BasicBlock *              newBB = NULL;
-        BasicBlock *              origBB = &(*InsBlocks[i]);
-        std::vector<BasicBlock *> Successors;
-        Instruction *             TI = origBB->getTerminator();
-        uint32_t                  fs = origBB->getParent()->size();
-        uint32_t                  countto;
-
-        for (succ_iterator SI = succ_begin(origBB), SE = succ_end(origBB);
-             SI != SE; ++SI) {
-
-          BasicBlock *succ = *SI;
-          Successors.push_back(succ);
-
-        }
-
-        if (fs == 1) {
-
-          newBB = origBB;
-          countto = 1;
-
-        } else {
-
-          if (TI == NULL || TI->getNumSuccessors() < 2) continue;
-          countto = Successors.size();
-
-        }
-
-        // if (Successors.size() != TI->getNumSuccessors())
-        //  FATAL("Different successor numbers %lu <-> %u\n", Successors.size(),
-        //        TI->getNumSuccessors());
-
-        for (uint32_t j = 0; j < countto; j++) {
-
-          if (fs != 1) newBB = llvm::SplitEdge(origBB, Successors[j]);
-
-          if (!newBB) {
-
-            if (!be_quiet) WARNF("Split failed!");
-            continue;
-
-          }
-
-          if (documentFile) {
-
-            fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n",
-                    moduleID, F.getName().str().c_str(), afl_global_id);
-
-          }
-
-          BasicBlock::iterator IP = newBB->getFirstInsertionPt();
-          IRBuilder<>          IRB(&(*IP));
-
-          /* Set the ID of the inserted basic block */
-
-          ConstantInt *CurLoc = ConstantInt::get(Int32Ty, afl_global_id++);
-
-          /* Load SHM pointer */
-
-          Value *MapPtrIdx;
-
-          if (map_addr) {
-
-            MapPtrIdx = IRB.CreateGEP(MapPtrFixed, CurLoc);
-
-          } else {
-
-            LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
-            MapPtr->setMetadata(M.getMDKindID("nosanitize"),
-                                MDNode::get(C, None));
-            MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc);
-
-          }
-
-          /* Update bitmap */
-
-          if (use_threadsafe_counters) {
-
-            IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One,
-#if LLVM_VERSION_MAJOR >= 13
-                                llvm::MaybeAlign(1),
-#endif
-                                llvm::AtomicOrdering::Monotonic);
-
-          } else {
-
-            LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
-            Counter->setMetadata(M.getMDKindID("nosanitize"),
-                                 MDNode::get(C, None));
-
-            Value *Incr = IRB.CreateAdd(Counter, One);
-
-            if (skip_nozero == NULL) {
-
-              auto cf = IRB.CreateICmpEQ(Incr, Zero);
-              auto carry = IRB.CreateZExt(cf, Int8Ty);
-              Incr = IRB.CreateAdd(Incr, carry);
-
-            }
-
-            IRB.CreateStore(Incr, MapPtrIdx)
-                ->setMetadata(M.getMDKindID("nosanitize"),
-                              MDNode::get(C, None));
-
-          }
-
-          // done :)
-
-          inst_blocks++;
-
-        }
-
-      } while (i > 0);
-
-    }
-
-  }
-
-  if (documentFile) fclose(documentFile);
-  documentFile = NULL;
-
-  // save highest location ID to global variable
-  // do this after each function to fail faster
-  if (!be_quiet && afl_global_id > MAP_SIZE &&
-      afl_global_id > FS_OPT_MAX_MAPSIZE) {
-
-    uint32_t pow2map = 1, map = afl_global_id;
-    while ((map = map >> 1))
-      pow2map++;
-    WARNF(
-        "We have %u blocks to instrument but the map size is only %u. Either "
-        "edit config.h and set MAP_SIZE_POW2 from %d to %u, then recompile "
-        "afl-fuzz and llvm_mode and then make this target - or set "
-        "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this "
-        "target.",
-        afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id);
-
-  }
-
-  if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) {
-
-    // yes we could create our own function, insert it into ctors ...
-    // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o
-
-    Function *f = M.getFunction("__afl_auto_init_globals");
-
-    if (!f) {
-
-      fprintf(stderr,
-              "Error: init function could not be found (this should not "
-              "happen)\n");
-      exit(-1);
-
-    }
-
-    BasicBlock *bb = &f->getEntryBlock();
-    if (!bb) {
-
-      fprintf(stderr,
-              "Error: init function does not have an EntryBlock (this should "
-              "not happen)\n");
-      exit(-1);
-
-    }
-
-    BasicBlock::iterator IP = bb->getFirstInsertionPt();
-    IRBuilder<>          IRB(&(*IP));
-
-    if (map_addr) {
-
-      GlobalVariable *AFLMapAddrFixed = new GlobalVariable(
-          M, Int64Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr");
-      ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
-      StoreInst *  StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed);
-      StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"),
-                                MDNode::get(C, None));
-
-    }
-
-    if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
-
-      uint32_t write_loc = (((afl_global_id + 63) >> 6) << 6);
-
-      GlobalVariable *AFLFinalLoc = new GlobalVariable(
-          M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
-      ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc);
-      StoreInst *  StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
-      StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
-                                 MDNode::get(C, None));
-
-    }
-
-    if (dictionary.size()) {
-
-      size_t memlen = 0, count = 0;
-
-      // sort and unique the dictionary
-      std::sort(dictionary.begin(), dictionary.end());
-      auto last = std::unique(dictionary.begin(), dictionary.end());
-      dictionary.erase(last, dictionary.end());
-
-      for (auto token : dictionary) {
-
-        memlen += token.length();
-        count++;
-
-      }
-
-      if (!be_quiet)
-        printf("AUTODICTIONARY: %zu string%s found\n", count,
-               count == 1 ? "" : "s");
-
-      if (count) {
-
-        if ((ptr = (char *)malloc(memlen + count)) == NULL) {
-
-          fprintf(stderr, "Error: malloc for %zu bytes failed!\n",
-                  memlen + count);
-          exit(-1);
-
-        }
-
-        count = 0;
-
-        size_t offset = 0;
-        for (auto token : dictionary) {
-
-          if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
-
-            ptr[offset++] = (uint8_t)token.length();
-            memcpy(ptr + offset, token.c_str(), token.length());
-            offset += token.length();
-            count++;
-
-          }
-
-        }
-
-        GlobalVariable *AFLDictionaryLen =
-            new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
-                               0, "__afl_dictionary_len");
-        ConstantInt *const_len = ConstantInt::get(Int32Ty, offset);
-        StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen);
-        StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
-                                  MDNode::get(C, None));
-
-        ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset);
-        GlobalVariable *AFLInternalDictionary = new GlobalVariable(
-            M, ArrayTy, true, GlobalValue::ExternalLinkage,
-            ConstantDataArray::get(C,
-                                   *(new ArrayRef<char>((char *)ptr, offset))),
-            "__afl_internal_dictionary");
-        AFLInternalDictionary->setInitializer(ConstantDataArray::get(
-            C, *(new ArrayRef<char>((char *)ptr, offset))));
-        AFLInternalDictionary->setConstant(true);
-
-        GlobalVariable *AFLDictionary = new GlobalVariable(
-            M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage,
-            0, "__afl_dictionary");
-
-        Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero);
-        Value *AFLDictPtr =
-            IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0));
-        StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary);
-        StoreDict->setMetadata(M.getMDKindID("nosanitize"),
-                               MDNode::get(C, None));
-
-      }
-
-    }
-
-  }
-
-  /* Say something nice. */
-
-  if (!be_quiet) {
-
-    if (!inst_blocks)
-      WARNF("No instrumentation targets found.");
-    else {
-
-      char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
-               getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
-               getenv("AFL_USE_ASAN") ? ", ASAN" : "",
-               getenv("AFL_USE_MSAN") ? ", MSAN" : "",
-               getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
-               getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
-      OKF("Instrumented %d locations with no collisions (on average %llu "
-          "collisions would be in afl-gcc/vanilla AFL) (%s mode).",
-          inst_blocks, calculateCollisions(inst_blocks), modeline);
-
-    }
-
-  }
-
-  return true;
-
-}
-
-char AFLLTOPass::ID = 0;
-
-static void registerAFLLTOPass(const PassManagerBuilder &,
-                               legacy::PassManagerBase &PM) {
-
-  PM.add(new AFLLTOPass());
-
-}
-
-static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass",
-                                  false, false);
-
-static RegisterStandardPasses RegisterAFLLTOPass(
-    PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass);
-
diff --git a/instrumentation/afl-llvm-lto-instrumentlist.so.cc b/instrumentation/afl-llvm-lto-instrumentlist.so.cc
index 416dbb88..906af879 100644
--- a/instrumentation/afl-llvm-lto-instrumentlist.so.cc
+++ b/instrumentation/afl-llvm-lto-instrumentlist.so.cc
@@ -15,7 +15,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
    This library is plugged into LLVM when invoking clang through afl-clang-fast.
    It tells the compiler to add code roughly equivalent to the bits discussed
@@ -116,10 +116,15 @@ bool AFLcheckIfInstrument::runOnModule(Module &M) {
 
       auto &        Ctx = F.getContext();
       AttributeList Attrs = F.getAttributes();
-      AttrBuilder   NewAttrs;
+#if LLVM_VERSION_MAJOR >= 14
+      AttributeList NewAttrs = Attrs.addFnAttribute(Ctx, "skipinstrument");
+      F.setAttributes(NewAttrs);
+#else
+      AttrBuilder NewAttrs;
       NewAttrs.addAttribute("skipinstrument");
       F.setAttributes(
           Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+#endif
 
     }
 
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
index ecf28f31..8e22fde8 100644
--- a/instrumentation/afl-llvm-pass.so.cc
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -18,7 +18,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
    This library is plugged into LLVM when invoking clang through afl-clang-fast.
    It tells the compiler to add code roughly equivalent to the bits discussed
@@ -52,7 +52,7 @@ typedef long double max_align_t;
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/DebugInfo.h"
   #include "llvm/IR/CFG.h"
@@ -114,7 +114,7 @@ uint64_t PowerOf2Ceil(unsigned in) {
 #endif
 
 /* #if LLVM_VERSION_STRING >= "4.0.1" */
-#if LLVM_VERSION_MAJOR > 4 || \
+#if LLVM_VERSION_MAJOR >= 5 || \
     (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1)
   #define AFL_HAVE_VECTOR_INTRINSICS 1
 #endif
@@ -662,22 +662,7 @@ bool AFLCoverage::runOnModule(Module &M) {
       /* Update bitmap */
 
       if (use_threadsafe_counters) {                              /* Atomic */
-                                     /*
-                                     #if LLVM_VERSION_MAJOR < 9
-                                             if (neverZero_counters_str !=
-                                                 NULL) {  // with llvm 9 we make this the default as the bug
-                                     in llvm
-                                                          // is then fixed
-                                     #else
-                                             if (!skip_nozero) {
-                             
-                                     #endif
-                                               // register MapPtrIdx in a todo list
-                                               todo.push_back(MapPtrIdx);
-                             
-                                             } else {
-                             
-                                     */
+
         IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One,
 #if LLVM_VERSION_MAJOR >= 13
                             llvm::MaybeAlign(1),
@@ -696,13 +681,12 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         Value *Incr = IRB.CreateAdd(Counter, One);
 
-#if LLVM_VERSION_MAJOR < 9
-        if (neverZero_counters_str !=
-            NULL) {  // with llvm 9 we make this the default as the bug in llvm
-                     // is then fixed
-#else
+#if LLVM_VERSION_MAJOR >= 9
         if (!skip_nozero) {
 
+#else
+        if (neverZero_counters_str != NULL) {
+
 #endif
           /* hexcoder: Realize a counter that skips zero during overflow.
            * Once this counter reaches its maximum value, it next increments to
@@ -956,11 +940,12 @@ bool AFLCoverage::runOnModule(Module &M) {
     else {
 
       char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s",
                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
                getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+               getenv("AFL_USE_TSAN") ? ", TSAN" : "",
                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
       OKF("Instrumented %d locations (%s mode, ratio %u%%).", inst_blocks,
           modeline, inst_ratio);
diff --git a/instrumentation/afl-llvm-rt-lto.o.c b/instrumentation/afl-llvm-rt-lto.o.c
index e53785ff..eb346157 100644
--- a/instrumentation/afl-llvm-rt-lto.o.c
+++ b/instrumentation/afl-llvm-rt-lto.o.c
@@ -6,7 +6,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
 */
 
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index 0562c5b2..07f80b2c 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -11,7 +11,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
 */
 
@@ -37,7 +37,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/Verifier.h"
   #include "llvm/IR/DebugInfo.h"
@@ -66,11 +66,11 @@ class CmpLogInstructions : public ModulePass {
 
   bool runOnModule(Module &M) override;
 
-#if LLVM_VERSION_MAJOR < 4
-  const char *getPassName() const override {
+#if LLVM_VERSION_MAJOR >= 4
+  StringRef getPassName() const override {
 
 #else
-  StringRef getPassName() const override {
+  const char *getPassName() const override {
 
 #endif
     return "cmplog instructions";
@@ -113,10 +113,10 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
   IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
   IntegerType *Int128Ty = IntegerType::getInt128Ty(C);
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty,
                                  Int8Ty
@@ -125,16 +125,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns1 = cast<Function>(c1);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns1 = c1;
+#else
+  Function *cmplogHookIns1 = cast<Function>(c1);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty,
                                  Int8Ty
@@ -143,16 +143,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns2 = cast<Function>(c2);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns2 = c2;
+#else
+  Function *cmplogHookIns2 = cast<Function>(c2);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty,
                                  Int8Ty
@@ -161,16 +161,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns4 = cast<Function>(c4);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns4 = c4;
+#else
+  Function *cmplogHookIns4 = cast<Function>(c4);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty,
                                  Int8Ty
@@ -179,16 +179,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns8 = cast<Function>(c8);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns8 = c8;
+#else
+  Function *cmplogHookIns8 = cast<Function>(c8);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c16 = M.getOrInsertFunction("__cmplog_ins_hook16", VoidTy, Int128Ty,
                                   Int128Ty, Int8Ty
@@ -203,10 +203,10 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
   FunctionCallee cmplogHookIns16 = c16;
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       cN = M.getOrInsertFunction("__cmplog_ins_hookN", VoidTy, Int128Ty,
                                  Int128Ty, Int8Ty, Int8Ty
@@ -215,10 +215,10 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookInsN = cast<Function>(cN);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookInsN = cN;
+#else
+  Function *cmplogHookInsN = cast<Function>(cN);
 #endif
 
   GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map");
@@ -274,14 +274,15 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       Value *op0 = selectcmpInst->getOperand(0);
       Value *op1 = selectcmpInst->getOperand(1);
+      Value *op0_saved = op0, *op1_saved = op1;
+      auto   ty0 = op0->getType();
+      auto   ty1 = op1->getType();
 
-      IntegerType *        intTyOp0 = NULL;
-      IntegerType *        intTyOp1 = NULL;
-      unsigned             max_size = 0, cast_size = 0;
-      unsigned char        attr = 0;
-      std::vector<Value *> args;
-
-      CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst);
+      IntegerType *intTyOp0 = NULL;
+      IntegerType *intTyOp1 = NULL;
+      unsigned     max_size = 0, cast_size = 0;
+      unsigned     attr = 0, vector_cnt = 0;
+      CmpInst *    cmpInst = dyn_cast<CmpInst>(selectcmpInst);
 
       if (!cmpInst) { continue; }
 
@@ -327,7 +328,23 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       if (selectcmpInst->getOpcode() == Instruction::FCmp) {
 
-        auto ty0 = op0->getType();
+        if (ty0->isVectorTy()) {
+
+          VectorType *tt = dyn_cast<VectorType>(ty0);
+          if (!tt) {
+
+            fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n");
+            continue;
+
+          }
+
+#if (LLVM_VERSION_MAJOR >= 12)
+          vector_cnt = tt->getElementCount().getKnownMinValue();
+          ty0 = tt->getElementType();
+#endif
+
+        }
+
         if (ty0->isHalfTy()
 #if LLVM_VERSION_MAJOR >= 11
             || ty0->isBFloatTy()
@@ -342,13 +359,35 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           max_size = 80;
         else if (ty0->isFP128Ty() || ty0->isPPC_FP128Ty())
           max_size = 128;
+#if (LLVM_VERSION_MAJOR >= 12)
+        else if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet)
+          fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u!\n",
+                  ty0->getTypeID());
+#endif
 
         attr += 8;
 
       } else {
 
-        intTyOp0 = dyn_cast<IntegerType>(op0->getType());
-        intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+        if (ty0->isVectorTy()) {
+
+#if (LLVM_VERSION_MAJOR >= 12)
+          VectorType *tt = dyn_cast<VectorType>(ty0);
+          if (!tt) {
+
+            fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n");
+            continue;
+
+          }
+
+          vector_cnt = tt->getElementCount().getKnownMinValue();
+          ty1 = ty0 = tt->getElementType();
+#endif
+
+        }
+
+        intTyOp0 = dyn_cast<IntegerType>(ty0);
+        intTyOp1 = dyn_cast<IntegerType>(ty1);
 
         if (intTyOp0 && intTyOp1) {
 
@@ -356,11 +395,28 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                          ? intTyOp0->getBitWidth()
                          : intTyOp1->getBitWidth();
 
+        } else {
+
+#if (LLVM_VERSION_MAJOR >= 12)
+          if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) {
+
+            fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u\n",
+                    ty0->getTypeID());
+
+          }
+
+#endif
+
         }
 
       }
 
-      if (!max_size || max_size < 16) { continue; }
+      if (!max_size || max_size < 16) {
+
+        // fprintf(stderr, "too small\n");
+        continue;
+
+      }
 
       if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); }
 
@@ -393,67 +449,110 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       }
 
-      // errs() << "[CMPLOG] cmp  " << *cmpInst << "(in function " <<
-      // cmpInst->getFunction()->getName() << ")\n";
+      uint64_t cur = 0, last_val0 = 0, last_val1 = 0, cur_val;
 
-      // first bitcast to integer type of the same bitsize as the original
-      // type (this is a nop, if already integer)
-      Value *op0_i = IRB.CreateBitCast(
-          op0, IntegerType::get(C, op0->getType()->getPrimitiveSizeInBits()));
-      // then create a int cast, which does zext, trunc or bitcast. In our case
-      // usually zext to the next larger supported type (this is a nop if
-      // already the right type)
-      Value *V0 =
-          IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false);
-      args.push_back(V0);
-      Value *op1_i = IRB.CreateBitCast(
-          op1, IntegerType::get(C, op1->getType()->getPrimitiveSizeInBits()));
-      Value *V1 =
-          IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false);
-      args.push_back(V1);
+      while (1) {
 
-      // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << *V1
-      // << "\n";
+        std::vector<Value *> args;
+        uint32_t             skip = 0;
 
-      ConstantInt *attribute = ConstantInt::get(Int8Ty, attr);
-      args.push_back(attribute);
+        if (vector_cnt) {
 
-      if (cast_size != max_size) {
+          op0 = IRB.CreateExtractElement(op0_saved, cur);
+          op1 = IRB.CreateExtractElement(op1_saved, cur);
+          ConstantInt *i0 = dyn_cast<ConstantInt>(op0);
+          ConstantInt *i1 = dyn_cast<ConstantInt>(op1);
+          if (i0 && i0->uge(0xffffffffffffffff) == false) {
 
-        ConstantInt *bitsize = ConstantInt::get(Int8Ty, (max_size / 8) - 1);
-        args.push_back(bitsize);
+            cur_val = i0->getZExtValue();
+            if (last_val0 && last_val0 == cur_val) { skip = 1; }
+            last_val0 = cur_val;
 
-      }
+          }
 
-      // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n",
-      //         max_size, cast_size, attr);
+          if (i1 && i1->uge(0xffffffffffffffff) == false) {
 
-      switch (cast_size) {
+            cur_val = i1->getZExtValue();
+            if (last_val1 && last_val1 == cur_val) { skip = 1; }
+            last_val1 = cur_val;
 
-        case 8:
-          IRB.CreateCall(cmplogHookIns1, args);
-          break;
-        case 16:
-          IRB.CreateCall(cmplogHookIns2, args);
-          break;
-        case 32:
-          IRB.CreateCall(cmplogHookIns4, args);
-          break;
-        case 64:
-          IRB.CreateCall(cmplogHookIns8, args);
-          break;
-        case 128:
-          if (max_size == 128) {
+          }
+
+        }
+
+        if (!skip) {
+
+          // errs() << "[CMPLOG] cmp  " << *cmpInst << "(in function " <<
+          // cmpInst->getFunction()->getName() << ")\n";
+
+          // first bitcast to integer type of the same bitsize as the original
+          // type (this is a nop, if already integer)
+          Value *op0_i = IRB.CreateBitCast(
+              op0, IntegerType::get(C, ty0->getPrimitiveSizeInBits()));
+          // then create a int cast, which does zext, trunc or bitcast. In our
+          // case usually zext to the next larger supported type (this is a nop
+          // if already the right type)
+          Value *V0 =
+              IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false);
+          args.push_back(V0);
+          Value *op1_i = IRB.CreateBitCast(
+              op1, IntegerType::get(C, ty1->getPrimitiveSizeInBits()));
+          Value *V1 =
+              IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false);
+          args.push_back(V1);
 
-            IRB.CreateCall(cmplogHookIns16, args);
+          // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " <<
+          // *V1
+          // << "\n";
 
-          } else {
+          ConstantInt *attribute = ConstantInt::get(Int8Ty, attr);
+          args.push_back(attribute);
 
-            IRB.CreateCall(cmplogHookInsN, args);
+          if (cast_size != max_size) {
+
+            ConstantInt *bitsize = ConstantInt::get(Int8Ty, (max_size / 8) - 1);
+            args.push_back(bitsize);
 
           }
 
-          break;
+          // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n",
+          //         max_size, cast_size, attr);
+
+          switch (cast_size) {
+
+            case 8:
+              IRB.CreateCall(cmplogHookIns1, args);
+              break;
+            case 16:
+              IRB.CreateCall(cmplogHookIns2, args);
+              break;
+            case 32:
+              IRB.CreateCall(cmplogHookIns4, args);
+              break;
+            case 64:
+              IRB.CreateCall(cmplogHookIns8, args);
+              break;
+            case 128:
+              if (max_size == 128) {
+
+                IRB.CreateCall(cmplogHookIns16, args);
+
+              } else {
+
+                IRB.CreateCall(cmplogHookInsN, args);
+
+              }
+
+              break;
+
+          }
+
+        }
+
+        /* else fprintf(stderr, "skipped\n"); */
+
+        ++cur;
+        if (cur >= vector_cnt) { break; }
 
       }
 
diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc
index 1e2610f2..0565875e 100644
--- a/instrumentation/cmplog-routines-pass.cc
+++ b/instrumentation/cmplog-routines-pass.cc
@@ -11,7 +11,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
 */
 
@@ -36,7 +36,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/Verifier.h"
   #include "llvm/IR/DebugInfo.h"
@@ -65,11 +65,11 @@ class CmpLogRoutines : public ModulePass {
 
   bool runOnModule(Module &M) override;
 
-#if LLVM_VERSION_MAJOR < 4
-  const char *getPassName() const override {
+#if LLVM_VERSION_MAJOR >= 4
+  StringRef getPassName() const override {
 
 #else
-  StringRef getPassName() const override {
+  const char *getPassName() const override {
 
 #endif
     return "cmplog routines";
@@ -87,18 +87,20 @@ char CmpLogRoutines::ID = 0;
 
 bool CmpLogRoutines::hookRtns(Module &M) {
 
-  std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC;
-  LLVMContext &           C = M.getContext();
+  std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC,
+      Memcmp, Strcmp, Strncmp;
+  LLVMContext &C = M.getContext();
 
   Type *VoidTy = Type::getVoidTy(C);
   // PointerType *VoidPtrTy = PointerType::get(VoidTy, 0);
   IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+  IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
   PointerType *i8PtrTy = PointerType::get(Int8Ty, 0);
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, i8PtrTy, i8PtrTy
 #if LLVM_VERSION_MAJOR < 5
@@ -106,16 +108,16 @@ bool CmpLogRoutines::hookRtns(Module &M) {
                                 NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookFn = cast<Function>(c);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookFn = c;
+#else
+  Function *cmplogHookFn = cast<Function>(c);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c1 = M.getOrInsertFunction("__cmplog_rtn_llvm_stdstring_stdstring",
                                  VoidTy, i8PtrTy, i8PtrTy
@@ -124,16 +126,16 @@ bool CmpLogRoutines::hookRtns(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogLlvmStdStd = cast<Function>(c1);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogLlvmStdStd = c1;
+#else
+  Function *cmplogLlvmStdStd = cast<Function>(c1);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c2 = M.getOrInsertFunction("__cmplog_rtn_llvm_stdstring_cstring", VoidTy,
                                  i8PtrTy, i8PtrTy
@@ -142,16 +144,16 @@ bool CmpLogRoutines::hookRtns(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogLlvmStdC = cast<Function>(c2);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogLlvmStdC = c2;
+#else
+  Function *cmplogLlvmStdC = cast<Function>(c2);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c3 = M.getOrInsertFunction("__cmplog_rtn_gcc_stdstring_stdstring", VoidTy,
                                  i8PtrTy, i8PtrTy
@@ -160,16 +162,16 @@ bool CmpLogRoutines::hookRtns(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogGccStdStd = cast<Function>(c3);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogGccStdStd = c3;
+#else
+  Function *cmplogGccStdStd = cast<Function>(c3);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c4 = M.getOrInsertFunction("__cmplog_rtn_gcc_stdstring_cstring", VoidTy,
                                  i8PtrTy, i8PtrTy
@@ -178,10 +180,64 @@ bool CmpLogRoutines::hookRtns(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee cmplogGccStdC = c4;
+#else
   Function *cmplogGccStdC = cast<Function>(c4);
+#endif
+
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee
 #else
-  FunctionCallee cmplogGccStdC = c4;
+  Constant *
+#endif
+      c5 = M.getOrInsertFunction("__cmplog_rtn_hook_n", VoidTy, i8PtrTy,
+                                 i8PtrTy, Int64Ty
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee cmplogHookFnN = c5;
+#else
+  Function *cmplogHookFnN = cast<Function>(c5);
+#endif
+
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee
+#else
+  Constant *
+#endif
+      c6 = M.getOrInsertFunction("__cmplog_rtn_hook_strn", VoidTy, i8PtrTy,
+                                 i8PtrTy, Int64Ty
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee cmplogHookFnStrN = c6;
+#else
+  Function *cmplogHookFnStrN = cast<Function>(c6);
+#endif
+
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee
+#else
+  Constant *
+#endif
+      c7 = M.getOrInsertFunction("__cmplog_rtn_hook_str", VoidTy, i8PtrTy,
+                                 i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+                                 ,
+                                 NULL
+#endif
+      );
+#if LLVM_VERSION_MAJOR >= 9
+  FunctionCallee cmplogHookFnStr = c7;
+#else
+  Function *cmplogHookFnStr = cast<Function>(c7);
 #endif
 
   GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map");
@@ -214,12 +270,93 @@ bool CmpLogRoutines::hookRtns(Module &M) {
           if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
 
           FunctionType *FT = Callee->getFunctionType();
+          std::string   FuncName = Callee->getName().str();
 
           bool isPtrRtn = FT->getNumParams() >= 2 &&
                           !FT->getReturnType()->isVoidTy() &&
                           FT->getParamType(0) == FT->getParamType(1) &&
                           FT->getParamType(0)->isPointerTy();
 
+          bool isPtrRtnN = FT->getNumParams() >= 3 &&
+                           !FT->getReturnType()->isVoidTy() &&
+                           FT->getParamType(0) == FT->getParamType(1) &&
+                           FT->getParamType(0)->isPointerTy() &&
+                           FT->getParamType(2)->isIntegerTy();
+          if (isPtrRtnN) {
+
+            auto intTyOp =
+                dyn_cast<IntegerType>(callInst->getArgOperand(2)->getType());
+            if (intTyOp) {
+
+              if (intTyOp->getBitWidth() != 32 &&
+                  intTyOp->getBitWidth() != 64) {
+
+                isPtrRtnN = false;
+
+              }
+
+            }
+
+          }
+
+          bool isMemcmp =
+              (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") ||
+               !FuncName.compare("CRYPTO_memcmp") ||
+               !FuncName.compare("OPENSSL_memcmp") ||
+               !FuncName.compare("memcmp_const_time") ||
+               !FuncName.compare("memcmpct"));
+          isMemcmp &= FT->getNumParams() == 3 &&
+                      FT->getReturnType()->isIntegerTy(32) &&
+                      FT->getParamType(0)->isPointerTy() &&
+                      FT->getParamType(1)->isPointerTy() &&
+                      FT->getParamType(2)->isIntegerTy();
+
+          bool isStrcmp =
+              (!FuncName.compare("strcmp") || !FuncName.compare("xmlStrcmp") ||
+               !FuncName.compare("xmlStrEqual") ||
+               !FuncName.compare("g_strcmp0") ||
+               !FuncName.compare("curl_strequal") ||
+               !FuncName.compare("strcsequal") ||
+               !FuncName.compare("strcasecmp") ||
+               !FuncName.compare("stricmp") ||
+               !FuncName.compare("ap_cstr_casecmp") ||
+               !FuncName.compare("OPENSSL_strcasecmp") ||
+               !FuncName.compare("xmlStrcasecmp") ||
+               !FuncName.compare("g_strcasecmp") ||
+               !FuncName.compare("g_ascii_strcasecmp") ||
+               !FuncName.compare("Curl_strcasecompare") ||
+               !FuncName.compare("Curl_safe_strcasecompare") ||
+               !FuncName.compare("cmsstrcasecmp") ||
+               !FuncName.compare("strstr") ||
+               !FuncName.compare("g_strstr_len") ||
+               !FuncName.compare("ap_strcasestr") ||
+               !FuncName.compare("xmlStrstr") ||
+               !FuncName.compare("xmlStrcasestr") ||
+               !FuncName.compare("g_str_has_prefix") ||
+               !FuncName.compare("g_str_has_suffix"));
+          isStrcmp &=
+              FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
+              FT->getParamType(0) == FT->getParamType(1) &&
+              FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
+
+          bool isStrncmp = (!FuncName.compare("strncmp") ||
+                            !FuncName.compare("xmlStrncmp") ||
+                            !FuncName.compare("curl_strnequal") ||
+                            !FuncName.compare("strncasecmp") ||
+                            !FuncName.compare("strnicmp") ||
+                            !FuncName.compare("ap_cstr_casecmpn") ||
+                            !FuncName.compare("OPENSSL_strncasecmp") ||
+                            !FuncName.compare("xmlStrncasecmp") ||
+                            !FuncName.compare("g_ascii_strncasecmp") ||
+                            !FuncName.compare("Curl_strncasecompare") ||
+                            !FuncName.compare("g_strncasecmp"));
+          isStrncmp &= FT->getNumParams() == 3 &&
+                       FT->getReturnType()->isIntegerTy(32) &&
+                       FT->getParamType(0) == FT->getParamType(1) &&
+                       FT->getParamType(0) ==
+                           IntegerType::getInt8PtrTy(M.getContext()) &&
+                       FT->getParamType(2)->isIntegerTy();
+
           bool isGccStdStringStdString =
               Callee->getName().find("__is_charIT_EE7__value") !=
                   std::string::npos &&
@@ -267,13 +404,19 @@ bool CmpLogRoutines::hookRtns(Module &M) {
           */
 
           if (isGccStdStringCString || isGccStdStringStdString ||
-              isLlvmStdStringStdString || isLlvmStdStringCString) {
+              isLlvmStdStringStdString || isLlvmStdStringCString || isMemcmp ||
+              isStrcmp || isStrncmp) {
 
-            isPtrRtn = false;
+            isPtrRtnN = isPtrRtn = false;
 
           }
 
+          if (isPtrRtnN) { isPtrRtn = false; }
+
           if (isPtrRtn) { calls.push_back(callInst); }
+          if (isMemcmp || isPtrRtnN) { Memcmp.push_back(callInst); }
+          if (isStrcmp) { Strcmp.push_back(callInst); }
+          if (isStrncmp) { Strncmp.push_back(callInst); }
           if (isGccStdStringStdString) { gccStdStd.push_back(callInst); }
           if (isGccStdStringCString) { gccStdC.push_back(callInst); }
           if (isLlvmStdStringStdString) { llvmStdStd.push_back(callInst); }
@@ -288,7 +431,8 @@ bool CmpLogRoutines::hookRtns(Module &M) {
   }
 
   if (!calls.size() && !gccStdStd.size() && !gccStdC.size() &&
-      !llvmStdStd.size() && !llvmStdC.size())
+      !llvmStdStd.size() && !llvmStdC.size() && !Memcmp.size() &&
+      Strcmp.size() && Strncmp.size())
     return false;
 
   /*
@@ -323,6 +467,96 @@ bool CmpLogRoutines::hookRtns(Module &M) {
 
   }
 
+  for (auto &callInst : Memcmp) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1),
+          *v3P = callInst->getArgOperand(2);
+
+    IRBuilder<> IRB2(callInst->getParent());
+    IRB2.SetInsertPoint(callInst);
+
+    LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr);
+    CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+    auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null);
+    auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false);
+
+    IRBuilder<> IRB(ThenTerm);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    Value *              v3Pbitcast = IRB.CreateBitCast(
+        v3P, IntegerType::get(C, v3P->getType()->getPrimitiveSizeInBits()));
+    Value *v3Pcasted =
+        IRB.CreateIntCast(v3Pbitcast, IntegerType::get(C, 64), false);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+    args.push_back(v3Pcasted);
+
+    IRB.CreateCall(cmplogHookFnN, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
+  for (auto &callInst : Strcmp) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+    IRBuilder<> IRB2(callInst->getParent());
+    IRB2.SetInsertPoint(callInst);
+
+    LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr);
+    CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+    auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null);
+    auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false);
+
+    IRBuilder<> IRB(ThenTerm);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+
+    IRB.CreateCall(cmplogHookFnStr, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
+  for (auto &callInst : Strncmp) {
+
+    Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1),
+          *v3P = callInst->getArgOperand(2);
+
+    IRBuilder<> IRB2(callInst->getParent());
+    IRB2.SetInsertPoint(callInst);
+
+    LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr);
+    CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+    auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null);
+    auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false);
+
+    IRBuilder<> IRB(ThenTerm);
+
+    std::vector<Value *> args;
+    Value *              v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+    Value *              v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+    Value *              v3Pbitcast = IRB.CreateBitCast(
+        v3P, IntegerType::get(C, v3P->getType()->getPrimitiveSizeInBits()));
+    Value *v3Pcasted =
+        IRB.CreateIntCast(v3Pbitcast, IntegerType::get(C, 64), false);
+    args.push_back(v1Pcasted);
+    args.push_back(v2Pcasted);
+    args.push_back(v3Pcasted);
+
+    IRB.CreateCall(cmplogHookFnStrN, args);
+
+    // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+  }
+
   for (auto &callInst : gccStdStd) {
 
     Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc
index c42d44fe..bcd5f8bd 100644
--- a/instrumentation/cmplog-switches-pass.cc
+++ b/instrumentation/cmplog-switches-pass.cc
@@ -11,7 +11,7 @@
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
 
-     http://www.apache.org/licenses/LICENSE-2.0
+     https://www.apache.org/licenses/LICENSE-2.0
 
 */
 
@@ -37,7 +37,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/Verifier.h"
   #include "llvm/IR/DebugInfo.h"
@@ -112,10 +112,10 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
   IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
   IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty,
                                  Int8Ty
@@ -124,16 +124,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns1 = cast<Function>(c1);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns1 = c1;
+#else
+  Function *cmplogHookIns1 = cast<Function>(c1);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty,
                                  Int8Ty
@@ -142,16 +142,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns2 = cast<Function>(c2);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns2 = c2;
+#else
+  Function *cmplogHookIns2 = cast<Function>(c2);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty,
                                  Int8Ty
@@ -160,16 +160,16 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns4 = cast<Function>(c4);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns4 = c4;
+#else
+  Function *cmplogHookIns4 = cast<Function>(c4);
 #endif
 
-#if LLVM_VERSION_MAJOR < 9
-  Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee
+#else
+  Constant *
 #endif
       c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty,
                                  Int8Ty
@@ -178,10 +178,10 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
                                  NULL
 #endif
       );
-#if LLVM_VERSION_MAJOR < 9
-  Function *cmplogHookIns8 = cast<Function>(c8);
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee cmplogHookIns8 = c8;
+#else
+  Function *cmplogHookIns8 = cast<Function>(c8);
 #endif
 
   GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map");
diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc
index 288e8282..ef3bd66b 100644
--- a/instrumentation/compare-transform-pass.so.cc
+++ b/instrumentation/compare-transform-pass.so.cc
@@ -5,7 +5,7 @@
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *     https://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -35,7 +35,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/Verifier.h"
   #include "llvm/IR/DebugInfo.h"
@@ -64,11 +64,11 @@ class CompareTransform : public ModulePass {
 
   bool runOnModule(Module &M) override;
 
-#if LLVM_VERSION_MAJOR < 4
-  const char *getPassName() const override {
+#if LLVM_VERSION_MAJOR >= 4
+  StringRef getPassName() const override {
 
 #else
-  StringRef      getPassName() const override {
+  const char *getPassName() const override {
 
 #endif
     return "transforms compare functions";
@@ -100,17 +100,17 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
   IntegerType *                    Int32Ty = IntegerType::getInt32Ty(C);
   IntegerType *                    Int64Ty = IntegerType::getInt64Ty(C);
 
-#if LLVM_VERSION_MAJOR < 9
-  Function *tolowerFn;
-#else
+#if LLVM_VERSION_MAJOR >= 9
   FunctionCallee tolowerFn;
+#else
+  Function *  tolowerFn;
 #endif
   {
 
-#if LLVM_VERSION_MAJOR < 9
-    Constant *
-#else
+#if LLVM_VERSION_MAJOR >= 9
     FunctionCallee
+#else
+    Constant *
 #endif
         c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty
 #if LLVM_VERSION_MAJOR < 5
@@ -118,10 +118,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                                   NULL
 #endif
         );
-#if LLVM_VERSION_MAJOR < 9
-    tolowerFn = cast<Function>(c);
-#else
+#if LLVM_VERSION_MAJOR >= 9
     tolowerFn = c;
+#else
+    tolowerFn = cast<Function>(c);
 #endif
 
   }
@@ -445,6 +445,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    // the following is in general OK, but strncmp is sometimes used in binary
+    // data structures and this can result in crashes :( so it is commented out
+    /*
+
     // add null termination character implicit in c strings
     if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) {
 
@@ -452,10 +456,12 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    */
+
     // in the unusual case the const str has embedded null
     // characters, the string comparison functions should terminate
     // at the first null
-    if (!isMemcmp) {
+    if (!isMemcmp && TmpConstStr.find('\0') != std::string::npos) {
 
       TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
 
@@ -490,10 +496,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     PHINode *PN = PHINode::Create(
         Int32Ty, (next_lenchk_bb ? 2 : 1) * unrollLen + 1, "cmp_phi");
 
-#if LLVM_VERSION_MAJOR < 8
-    TerminatorInst *term = bb->getTerminator();
-#else
+#if LLVM_VERSION_MAJOR >= 8
     Instruction *term = bb->getTerminator();
+#else
+    TerminatorInst *term = bb->getTerminator();
 #endif
     BranchInst::Create(next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, bb);
     term->eraseFromParent();
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index 13f45b69..95485be9 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -6,7 +6,7 @@
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *     https://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -34,7 +34,7 @@
 #include "llvm/IR/Module.h"
 
 #include "llvm/IR/IRBuilder.h"
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/Verifier.h"
   #include "llvm/IR/DebugInfo.h"
@@ -578,16 +578,16 @@ bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M,
 
       /* dependent on the cmp of the high parts go to the end or go on with
        * the comparison */
-      auto        term = bb->getTerminator();
-      BranchInst *br = nullptr;
+      auto term = bb->getTerminator();
+
       if (pred == CmpInst::ICMP_EQ) {
 
-        br = BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb);
+        BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb);
 
       } else {
 
-        /* CmpInst::ICMP_NE */
-        br = BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb);
+        // CmpInst::ICMP_NE
+        BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb);
 
       }
 
@@ -675,7 +675,7 @@ bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M,
   ReplaceInstWithInst(cmp_inst->getParent()->getInstList(), ii, PN);
 
   // We split the comparison into low and high. If this isn't our target
-  // bitwidth we recursivly split the low and high parts again until we have
+  // bitwidth we recursively split the low and high parts again until we have
   // target bitwidth.
   if ((bitw / 2) > target_bitwidth) {
 
@@ -796,7 +796,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
 
   LLVMContext &C = M.getContext();
 
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
   const DataLayout &dl = M.getDataLayout();
 
@@ -1398,7 +1398,7 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 
   bool brokenDebug = false;
   if (verifyModule(M, &errs()
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
                           ,
                    &brokenDebug  // 9th May 2016
diff --git a/instrumentation/split-switches-pass.so.cc b/instrumentation/split-switches-pass.so.cc
index 82f198aa..c0fa7c9c 100644
--- a/instrumentation/split-switches-pass.so.cc
+++ b/instrumentation/split-switches-pass.so.cc
@@ -5,7 +5,7 @@
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *     https://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -37,7 +37,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 
 #include "llvm/IR/IRBuilder.h"
-#if LLVM_VERSION_MAJOR > 3 || \
+#if LLVM_VERSION_MAJOR >= 4 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
   #include "llvm/IR/Verifier.h"
   #include "llvm/IR/DebugInfo.h"
@@ -369,10 +369,10 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) {
     CaseVector Cases;
     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
          ++i)
-#if LLVM_VERSION_MAJOR < 5
-      Cases.push_back(CaseExpr(i.getCaseValue(), i.getCaseSuccessor()));
-#else
+#if LLVM_VERSION_MAJOR >= 5
       Cases.push_back(CaseExpr(i->getCaseValue(), i->getCaseSuccessor()));
+#else
+      Cases.push_back(CaseExpr(i.getCaseValue(), i.getCaseSuccessor()));
 #endif
     /* bugfix thanks to pbst
      * round up bytesChecked (in case getBitWidth() % 8 != 0) */