37 files changed, 1922 insertions, 997 deletions
diff --git a/README.md b/README.md
index bc547b3c..91f28118 100644
--- a/README.md
+++ b/README.md
@@ -88,20 +88,20 @@ behaviours and defaults:
   with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full *BSD,
   Mac OS, Solaris and Android support and much, much, much more.
 
-  | Feature/Instrumentation  | afl-gcc | llvm      | gcc_plugin | frida_mode | qemu_mode        |unicorn_mode |
-  | -------------------------|:-------:|:---------:|:----------:|:----------:|:----------------:|:------------:|
-  | Threadsafe counters      |         |     x(3)  |            |            |                  |              |
-  | NeverZero                | x86[_64]|     x(1)  |     x      |     x      |         x        |       x      |
-  | Persistent Mode          |         |     x     |     x      |  x86[_64]  | x86[_64]/arm[64] |       x      |
-  | LAF-Intel / CompCov      |         |     x     |            |            | x86[_64]/arm[64] | x86[_64]/arm |
-  | CmpLog                   |         |     x     |            |  x86[_64]  | x86[_64]/arm[64] |              |
-  | Selective Instrumentation|         |     x     |     x      |     x      |         x        |              |
-  | Non-Colliding Coverage   |         |     x(4)  |            |            |        (x)(5)    |              |
-  | Ngram prev_loc Coverage  |         |     x(6)  |            |            |                  |              |
-  | Context Coverage         |         |     x(6)  |            |            |                  |              |
-  | Auto Dictionary          |         |     x(7)  |            |            |                  |              |
-  | Snapshot LKM Support     |         |    (x)(8) |    (x)(8)  |            |        (x)(5)    |              |
-  | Shared Memory Testcases  |         |     x     |     x      |     x      |         x        |       x      |
+  | Feature/Instrumentation  | afl-gcc | llvm      | gcc_plugin | frida_mode       | qemu_mode        |unicorn_mode      |
+  | -------------------------|:-------:|:---------:|:----------:|:----------------:|:----------------:|:----------------:|
+  | Threadsafe counters      |         |     x(3)  |            |                  |                  |                  |
+  | NeverZero                | x86[_64]|     x(1)  |     x      |         x        |         x        |         x        |
+  | Persistent Mode          |         |     x     |     x      | x86[_64]/arm64   | x86[_64]/arm[64] |         x        |
+  | LAF-Intel / CompCov      |         |     x     |            |                  | x86[_64]/arm[64] | x86[_64]/arm[64] |
+  | CmpLog                   |         |     x     |            | x86[_64]/arm64   | x86[_64]/arm[64] |                  |
+  | Selective Instrumentation|         |     x     |     x      |         x        |         x        |                  |
+  | Non-Colliding Coverage   |         |     x(4)  |            |                  |        (x)(5)    |                  |
+  | Ngram prev_loc Coverage  |         |     x(6)  |            |                  |                  |                  |
+  | Context Coverage         |         |     x(6)  |            |                  |                  |                  |
+  | Auto Dictionary          |         |     x(7)  |            |                  |                  |                  |
+  | Snapshot LKM Support     |         |    (x)(8) |    (x)(8)  |                  |        (x)(5)    |                  |
+  | Shared Memory Testcases  |         |     x     |     x      | x86[_64]/arm64   |         x        |         x        |
 
   1. default for LLVM >= 9.0, env var for older version due an efficiency bug in previous llvm versions
   2. GCC creates non-performant code, hence it is disabled in gcc_plugin
@@ -796,7 +796,7 @@ If you do not have to use Unicorn the following setup is recommended to use
 qemu_mode:
   * run 1 afl-fuzz -Q instance with CMPLOG (`-c 0` + `AFL_COMPCOV_LEVEL=2`)
   * run 1 afl-fuzz -Q instance with QASAN  (`AFL_USE_QASAN=1`)
-  * run 1 afl-fuzz -Q instance with LAF (``AFL_PRELOAD=libcmpcov.so` + `AFL_COMPCOV_LEVEL=2`)
+  * run 1 afl-fuzz -Q instance with LAF (`AFL_PRELOAD=libcmpcov.so` + `AFL_COMPCOV_LEVEL=2`)
 Alternatively you can use frida_mode, just switch `-Q` with `-O` and remove the
 LAF instance.
 
diff --git a/custom_mutators/radamsa/custom_mutator_helpers.h b/custom_mutators/radamsa/custom_mutator_helpers.h
index e23c0b6a..f7532ef9 100644..120000
--- a/custom_mutators/radamsa/custom_mutator_helpers.h
+++ b/custom_mutators/radamsa/custom_mutator_helpers.h
@@ -1,342 +1 @@
-#ifndef CUSTOM_MUTATOR_HELPERS
-#define CUSTOM_MUTATOR_HELPERS
-
-#include "config.h"
-#include "types.h"
-#include <stdlib.h>
-
-#define INITIAL_GROWTH_SIZE (64)
-
-#define RAND_BELOW(limit) (rand() % (limit))
-
-/* Use in a struct: creates a name_buf and a name_size variable. */
-#define BUF_VAR(type, name) \
-  type * name##_buf;        \
-  size_t name##_size;
-/* this filles in `&structptr->something_buf, &structptr->something_size`. */
-#define BUF_PARAMS(struct, name) \
-  (void **)&struct->name##_buf, &struct->name##_size
-
-typedef struct {
-
-} afl_t;
-
-static void surgical_havoc_mutate(u8 *out_buf, s32 begin, s32 end) {
-
-  static s8  interesting_8[] = {INTERESTING_8};
-  static s16 interesting_16[] = {INTERESTING_8, INTERESTING_16};
-  static s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32};
-
-  switch (RAND_BELOW(12)) {
-
-    case 0: {
-
-      /* Flip a single bit somewhere. Spooky! */
-
-      s32 bit_idx = ((RAND_BELOW(end - begin) + begin) << 3) + RAND_BELOW(8);
-
-      out_buf[bit_idx >> 3] ^= 128 >> (bit_idx & 7);
-
-      break;
-
-    }
-
-    case 1: {
-
-      /* Set byte to interesting value. */
-
-      u8 val = interesting_8[RAND_BELOW(sizeof(interesting_8))];
-      out_buf[(RAND_BELOW(end - begin) + begin)] = val;
-
-      break;
-
-    }
-
-    case 2: {
-
-      /* Set word to interesting value, randomly choosing endian. */
-
-      if (end - begin < 2) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 1) break;
-
-      switch (RAND_BELOW(2)) {
-
-        case 0:
-          *(u16 *)(out_buf + byte_idx) =
-              interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)];
-          break;
-        case 1:
-          *(u16 *)(out_buf + byte_idx) =
-              SWAP16(interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]);
-          break;
-
-      }
-
-      break;
-
-    }
-
-    case 3: {
-
-      /* Set dword to interesting value, randomly choosing endian. */
-
-      if (end - begin < 4) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 3) break;
-
-      switch (RAND_BELOW(2)) {
-
-        case 0:
-          *(u32 *)(out_buf + byte_idx) =
-              interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)];
-          break;
-        case 1:
-          *(u32 *)(out_buf + byte_idx) =
-              SWAP32(interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]);
-          break;
-
-      }
-
-      break;
-
-    }
-
-    case 4: {
-
-      /* Set qword to interesting value, randomly choosing endian. */
-
-      if (end - begin < 8) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 7) break;
-
-      switch (RAND_BELOW(2)) {
-
-        case 0:
-          *(u64 *)(out_buf + byte_idx) =
-              (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)];
-          break;
-        case 1:
-          *(u64 *)(out_buf + byte_idx) = SWAP64(
-              (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]);
-          break;
-
-      }
-
-      break;
-
-    }
-
-    case 5: {
-
-      /* Randomly subtract from byte. */
-
-      out_buf[(RAND_BELOW(end - begin) + begin)] -= 1 + RAND_BELOW(ARITH_MAX);
-
-      break;
-
-    }
-
-    case 6: {
-
-      /* Randomly add to byte. */
-
-      out_buf[(RAND_BELOW(end - begin) + begin)] += 1 + RAND_BELOW(ARITH_MAX);
-
-      break;
-
-    }
-
-    case 7: {
-
-      /* Randomly subtract from word, random endian. */
-
-      if (end - begin < 2) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 1) break;
-
-      if (RAND_BELOW(2)) {
-
-        *(u16 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX);
-
-      } else {
-
-        u16 num = 1 + RAND_BELOW(ARITH_MAX);
-
-        *(u16 *)(out_buf + byte_idx) =
-            SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) - num);
-
-      }
-
-      break;
-
-    }
-
-    case 8: {
-
-      /* Randomly add to word, random endian. */
-
-      if (end - begin < 2) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 1) break;
-
-      if (RAND_BELOW(2)) {
-
-        *(u16 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX);
-
-      } else {
-
-        u16 num = 1 + RAND_BELOW(ARITH_MAX);
-
-        *(u16 *)(out_buf + byte_idx) =
-            SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) + num);
-
-      }
-
-      break;
-
-    }
-
-    case 9: {
-
-      /* Randomly subtract from dword, random endian. */
-
-      if (end - begin < 4) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 3) break;
-
-      if (RAND_BELOW(2)) {
-
-        *(u32 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX);
-
-      } else {
-
-        u32 num = 1 + RAND_BELOW(ARITH_MAX);
-
-        *(u32 *)(out_buf + byte_idx) =
-            SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) - num);
-
-      }
-
-      break;
-
-    }
-
-    case 10: {
-
-      /* Randomly add to dword, random endian. */
-
-      if (end - begin < 4) break;
-
-      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
-
-      if (byte_idx >= end - 3) break;
-
-      if (RAND_BELOW(2)) {
-
-        *(u32 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX);
-
-      } else {
-
-        u32 num = 1 + RAND_BELOW(ARITH_MAX);
-
-        *(u32 *)(out_buf + byte_idx) =
-            SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) + num);
-
-      }
-
-      break;
-
-    }
-
-    case 11: {
-
-      /* Just set a random byte to a random value. Because,
-         why not. We use XOR with 1-255 to eliminate the
-         possibility of a no-op. */
-
-      out_buf[(RAND_BELOW(end - begin) + begin)] ^= 1 + RAND_BELOW(255);
-
-      break;
-
-    }
-
-  }
-
-}
-
-/* This function calculates the next power of 2 greater or equal its argument.
- @return The rounded up power of 2 (if no overflow) or 0 on overflow.
-*/
-static inline size_t next_pow2(size_t in) {
-
-  if (in == 0 || in > (size_t)-1)
-    return 0;                  /* avoid undefined behaviour under-/overflow */
-  size_t out = in - 1;
-  out |= out >> 1;
-  out |= out >> 2;
-  out |= out >> 4;
-  out |= out >> 8;
-  out |= out >> 16;
-  return out + 1;
-
-}
-
-/* This function makes sure *size is > size_needed after call.
- It will realloc *buf otherwise.
- *size will grow exponentially as per:
- https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/
- Will return NULL and free *buf if size_needed is <1 or realloc failed.
- @return For convenience, this function returns *buf.
- */
-static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) {
-
-  /* No need to realloc */
-  if (likely(size_needed && *size >= size_needed)) return *buf;
-
-  /* No initial size was set */
-  if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE;
-
-  /* grow exponentially */
-  size_t next_size = next_pow2(size_needed);
-
-  /* handle overflow */
-  if (!next_size) { next_size = size_needed; }
-
-  /* alloc */
-  *buf = realloc(*buf, next_size);
-  *size = *buf ? next_size : 0;
-
-  return *buf;
-
-}
-
-/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */
-static inline void afl_swap_bufs(void **buf1, size_t *size1, void **buf2,
-                             size_t *size2) {
-
-  void * scratch_buf = *buf1;
-  size_t scratch_size = *size1;
-  *buf1 = *buf2;
-  *size1 = *size2;
-  *buf2 = scratch_buf;
-  *size2 = scratch_size;
-
-}
-
-#undef INITIAL_GROWTH_SIZE
-
-#endif
-
+../examples/custom_mutator_helpers.h
\ No newline at end of file
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 6c851460..9f70535a 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -9,20 +9,29 @@ Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 ### Version ++3.14a (release)
-  - Fix for llvm 13
   - afl-fuzz:
     - fix -F when a '/' was part of the parameter
+    - fixed a crash for cmplog for very slow inputs
     - removed implied -D determinstic from -M main
-    - if the target becomes unavailable check out out/default/error.txt for
-      an indicator why
-  - afl-cc
+    - if the target becomes unavailable check out out/default/error.txt
+      for an indicator why
+    - AFL_CAL_FAST was a dead env, now does the same as AFL_FAST_CAL
+  - afl-cc:
+    - Update to COMPCOV/laf-intel that speeds up the instrumentation
+      process a lot - thanks to Michael Rodler/f0rki for the PR!
+    - Fix to instrument global namespace functions in c++
+    - Fix for llvm 13
     - support partial linking
-    - We do support llvm versions from 3.8 again
-  - afl_analyze
-    - fix timeout handling and support forkserver
+    - We do support llvm versions from 3.8 to 5.0 again
+  - frida_mode:
+    - fix for cmplog
+    - remove need for AFL_FRIDA_PERSISTENT_RETADDR_OFFSET
+    - feature parity of aarch64 with intel now (persistent, cmplog,
+      in-memory testcases, asan)
+  - afl_analyze:
+    - fix timeout handling
+    - add forkserver support for better performance
   - ensure afl-compiler-rt is built for gcc_module
-  - afl-analyze now uses the forkserver for increased performance
-
 
 ### Version ++3.13c (release)
   - Note: plot_data switched to relative time from unix time in 3.10
diff --git a/docs/FAQ.md b/docs/FAQ.md
index 714d50eb..ab0abe6c 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -3,6 +3,7 @@
 ## Contents
 
   * [What is the difference between afl and afl++?](#what-is-the-difference-between-afl-and-afl)
+  * [I got a weird compile error from clang](#i-got-a-weird-compile-error-from-clang)
   * [How to improve the fuzzing speed?](#how-to-improve-the-fuzzing-speed)
   * [How do I fuzz a network service?](#how-do-i-fuzz-a-network-service)
   * [How do I fuzz a GUI program?](#how-do-i-fuzz-a-gui-program)
@@ -35,6 +36,26 @@ flexible and feature rich guided fuzzer available as open source.
 And in independent fuzzing benchmarks it is one of the best fuzzers available,
 e.g. [Fuzzbench Report](https://www.fuzzbench.com/reports/2020-08-03/index.html)
 
+## I got a weird compile error from clang
+
+If you see this kind of error when trying to instrument a target with afl-cc/
+afl-clang-fast/afl-clang-lto:
+```
+/prg/tmp/llvm-project/build/bin/clang-13: symbol lookup error: /usr/local/bin/../lib/afl//cmplog-instructions-pass.so: undefined symbol: _ZNK4llvm8TypeSizecvmEv
+clang-13: error: unable to execute command: No such file or directory
+clang-13: error: clang frontend command failed due to signal (use -v to see invocation)
+clang version 13.0.0 (https://github.com/llvm/llvm-project 1d7cf550721c51030144f3cd295c5789d51c4aad)
+Target: x86_64-unknown-linux-gnu
+Thread model: posix
+InstalledDir: /prg/tmp/llvm-project/build/bin
+clang-13: note: diagnostic msg: 
+********************
+```
+Then this means that your OS updated the clang installation from an upgrade
+package and because of that the afl++ llvm plugins do not match anymore.
+
+Solution: `git pull ; make clean install` of afl++
+
 ## How to improve the fuzzing speed?
 
   1. Use [llvm_mode](docs/llvm_mode/README.md): afl-clang-lto (llvm >= 11) or afl-clang-fast (llvm >= 9 recommended)
diff --git a/docs/env_variables.md b/docs/env_variables.md
index 38a67bc7..e058f377 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -108,9 +108,6 @@ make fairly broad use of environmental variables instead:
   - Setting `AFL_QUIET` will prevent afl-cc and afl-as banners from being
     displayed during compilation, in case you find them distracting.
 
-  - Setting `AFL_CAL_FAST` will speed up the initial calibration, if the
-    application is very slow.
-
 ## 2) Settings for LLVM and LTO: afl-clang-fast / afl-clang-fast++ / afl-clang-lto / afl-clang-lto++
 
 The native instrumentation helpers (instrumentation and gcc_plugin) accept a subset
@@ -386,6 +383,7 @@ checks or alter some of the more exotic semantics of the tool:
 
   - `AFL_FAST_CAL` keeps the calibration stage about 2.5x faster (albeit less
     precise), which can help when starting a session against a slow target.
+    `AFL_CAL_FAST` works too.
 
   - The CPU widget shown at the bottom of the screen is fairly simplistic and
     may complain of high load prematurely, especially on systems with low core
diff --git a/frida_mode/GNUmakefile b/frida_mode/GNUmakefile
index a0387cac..329d9f7f 100644
--- a/frida_mode/GNUmakefile
+++ b/frida_mode/GNUmakefile
@@ -20,6 +20,7 @@ RT_CFLAGS:=-Wno-unused-parameter \
 		   -Wno-unused-function \
 		   -Wno-unused-result \
 		   -Wno-int-to-pointer-cast \
+		   -Wno-pointer-sign \
 
 LDFLAGS+=-shared \
 		 -lpthread \
diff --git a/frida_mode/README.md b/frida_mode/README.md
index d7dd72a0..296e6405 100644
--- a/frida_mode/README.md
+++ b/frida_mode/README.md
@@ -75,7 +75,6 @@ following options are currently supported:
 * `AFL_FRIDA_PERSISTENT_CNT` - See `AFL_QEMU_PERSISTENT_CNT`
 * `AFL_FRIDA_PERSISTENT_HOOK` - See `AFL_QEMU_PERSISTENT_HOOK`
 * `AFL_FRIDA_PERSISTENT_RET` - See `AFL_QEMU_PERSISTENT_RET`
-* `AFL_FRIDA_PERSISTENT_RETADDR_OFFSET` - See `AFL_QEMU_PERSISTENT_RETADDR_OFFSET`
 
 To enable the powerful CMPLOG mechanism, set `-c 0` for `afl-fuzz`.
 
@@ -156,16 +155,17 @@ instrumentation (the default where available). Required to use
 * `AFL_FRIDA_INST_NO_PREFETCH` - Disable prefetching. By default the child will
 report instrumented blocks back to the parent so that it can also instrument
 them and they be inherited by the next child on fork.
-* `AFL_FRIDA_INST_TRACE` - Log to stdout the address of executed blocks
-`AFL_FRIDA_INST_NO_OPTIMIZE`.
+* `AFL_FRIDA_INST_TRACE` - Log to stdout the address of executed blocks,
+requires `AFL_FRIDA_INST_NO_OPTIMIZE`.
+* `AFL_FRIDA_INST_TRACE_UNIQUE` - As per `AFL_FRIDA_INST_TRACE`, but each edge
+is logged only once, requires `AFL_FRIDA_INST_NO_OPTIMIZE`.
 * `AFL_FRIDA_OUTPUT_STDOUT` - Redirect the standard output of the target
 application to the named file (supersedes the setting of `AFL_DEBUG_CHILD`)
 * `AFL_FRIDA_OUTPUT_STDERR` - Redirect the standard error of the target
 application to the named file (supersedes the setting of `AFL_DEBUG_CHILD`)
 * `AFL_FRIDA_PERSISTENT_DEBUG` - Insert a Breakpoint into the instrumented code
 at `AFL_FRIDA_PERSISTENT_HOOK` and `AFL_FRIDA_PERSISTENT_RET` to allow the user
-to determine the value of `AFL_FRIDA_PERSISTENT_RETADDR_OFFSET` using a
-debugger.
+to detect issues in the persistent loop using a debugger.
 
 ```
 
diff --git a/frida_mode/src/cmplog/cmplog.c b/frida_mode/src/cmplog/cmplog.c
index 7b11c350..3df7d13d 100644
--- a/frida_mode/src/cmplog/cmplog.c
+++ b/frida_mode/src/cmplog/cmplog.c
@@ -1,3 +1,8 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <syscall.h>
+
 #include "frida-gum.h"
 
 #include "debug.h"
@@ -5,10 +10,13 @@
 #include "util.h"
 
 #define DEFAULT_MMAP_MIN_ADDR (32UL << 10)
+#define FD_TMP_MAX_SIZE 65536
 
 extern struct cmp_map *__afl_cmp_map;
 
 static GArray *cmplog_ranges = NULL;
+static int     fd_tmp = -1;
+static ssize_t fd_tmp_size = 0;
 
 static gboolean cmplog_range(const GumRangeDetails *details,
                              gpointer               user_data) {
@@ -27,6 +35,40 @@ static gint cmplog_sort(gconstpointer a, gconstpointer b) {
 
 }
 
+static int cmplog_create_temp(void) {
+
+  const char *tmpdir = g_get_tmp_dir();
+  OKF("CMPLOG Temporary directory: %s", tmpdir);
+  gchar *fname = g_strdup_printf("%s/frida-cmplog-XXXXXX", tmpdir);
+  OKF("CMPLOG Temporary file template: %s", fname);
+  int fd = mkstemp(fname);
+  OKF("CMPLOG Temporary file: %s", fname);
+
+  if (fd < 0) {
+
+    FATAL("Failed to create temp file: %s, errno: %d", fname, errno);
+
+  }
+
+  if (unlink(fname) < 0) {
+
+    FATAL("Failed to unlink temp file: %s (%d), errno: %d", fname, fd, errno);
+
+  }
+
+  if (ftruncate(fd, 0) < 0) {
+
+    FATAL("Failed to ftruncate temp file: %s (%d), errno: %d", fname, fd,
+          errno);
+
+  }
+
+  g_free(fname);
+
+  return fd;
+
+}
+
 void cmplog_init(void) {
 
   if (__afl_cmp_map != NULL) { OKF("CMPLOG mode enabled"); }
@@ -44,6 +86,13 @@ void cmplog_init(void) {
 
   }
 
+  /*
+   * We can't use /dev/null or /dev/zero for this since it appears that they
+   * don't validate the input buffer. Persumably as an optimization because they
+   * don't actually write any data. The file will be deleted on close.
+   */
+  fd_tmp = cmplog_create_temp();
+
 }
 
 static gboolean cmplog_contains(GumAddress inner_base, GumAddress inner_limit,
@@ -67,6 +116,9 @@ gboolean cmplog_is_readable(guint64 addr, size_t size) {
    */
   if (addr < DEFAULT_MMAP_MIN_ADDR) { return false; }
 
+  /* Check our addres/length don't wrap around */
+  if (SIZE_MAX - addr < size) { return false; }
+
   GumAddress inner_base = addr;
   GumAddress inner_limit = inner_base + size;
 
@@ -81,6 +133,38 @@ gboolean cmplog_is_readable(guint64 addr, size_t size) {
 
   }
 
+  /*
+   * Our address map can change (e.g. stack growth), use write as a fallback to
+   * validate our address.
+   */
+  ssize_t written = syscall(__NR_write, fd_tmp, (void *)addr, size);
+
+  /*
+   * If the write succeeds, then the buffer must be valid otherwise it would
+   * return EFAULT
+   */
+  if (written > 0) {
+
+    fd_tmp_size += written;
+    if (fd_tmp_size > FD_TMP_MAX_SIZE) {
+
+      /*
+       * Truncate the file, we don't want our temp file to continue growing!
+       */
+      if (ftruncate(fd_tmp, 0) < 0) {
+
+        FATAL("Failed to truncate fd_tmp (%d), errno: %d", fd_tmp, errno);
+
+      }
+
+      fd_tmp_size = 0;
+
+    }
+
+    if ((size_t)written == size) { return true; }
+
+  }
+
   return false;
 
 }
diff --git a/frida_mode/src/instrument/instrument.c b/frida_mode/src/instrument/instrument.c
index f261e79a..ba82b89f 100644
--- a/frida_mode/src/instrument/instrument.c
+++ b/frida_mode/src/instrument/instrument.c
@@ -1,4 +1,6 @@
 #include <unistd.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
 
 #include "frida-gum.h"
 
@@ -18,44 +20,50 @@
 
 static gboolean               tracing = false;
 static gboolean               optimize = false;
+static gboolean               unique = false;
 static GumStalkerTransformer *transformer = NULL;
 
 __thread uint64_t previous_pc = 0;
 
+static GumAddress previous_rip = 0;
+static u8 *       edges_notified = NULL;
+
+static void trace_debug(char *format, ...) {
+
+  va_list ap;
+  char    buffer[4096] = {0};
+  int     ret;
+  int     len;
+
+  va_start(ap, format);
+  ret = vsnprintf(buffer, sizeof(buffer) - 1, format, ap);
+  va_end(ap);
+
+  if (ret < 0) { return; }
+
+  len = strnlen(buffer, sizeof(buffer));
+
+  IGNORED_RETURN(write(STDOUT_FILENO, buffer, len));
+
+}
+
 __attribute__((hot)) static void on_basic_block(GumCpuContext *context,
                                                 gpointer       user_data) {
 
   UNUSED_PARAMETER(context);
-  /*
-   * This function is performance critical as it is called to instrument every
-   * basic block. By moving our print buffer to a global, we avoid it affecting
-   * the critical path with additional stack adjustments if tracing is not
-   * enabled. If tracing is enabled, then we're printing a load of diagnostic
-   * information so this overhead is unlikely to be noticeable.
-   */
-  static char buffer[200];
-  int         len;
-  GumAddress  current_pc = GUM_ADDRESS(user_data);
-  uint8_t *   cursor;
-  uint64_t    value;
-  if (unlikely(tracing)) {
-
-    /* Avoid any functions which may cause an allocation since the target app
-     * may already be running inside malloc and it isn't designed to be
-     * re-entrant on a single thread */
-    len = snprintf(buffer, sizeof(buffer),
-                   "current_pc: 0x%016" G_GINT64_MODIFIER
-                   "x, previous_pc: 0x%016" G_GINT64_MODIFIER "x\n",
-                   current_pc, previous_pc);
 
-    IGNORED_RETURN(write(STDOUT_FILENO, buffer, len + 1));
+  GumAddress current_rip = GUM_ADDRESS(user_data);
+  GumAddress current_pc;
+  GumAddress edge;
+  uint8_t *  cursor;
+  uint64_t   value;
 
-  }
-
-  current_pc = (current_pc >> 4) ^ (current_pc << 8);
+  current_pc = (current_rip >> 4) ^ (current_rip << 8);
   current_pc &= MAP_SIZE - 1;
 
-  cursor = &__afl_area_ptr[current_pc ^ previous_pc];
+  edge = current_pc ^ previous_pc;
+
+  cursor = &__afl_area_ptr[edge];
   value = *cursor;
 
   if (value == 0xff) {
@@ -71,6 +79,23 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context,
   *cursor = value;
   previous_pc = current_pc >> 1;
 
+  if (unlikely(tracing)) {
+
+    if (!unique || edges_notified[edge] == 0) {
+
+      trace_debug("TRACE: edge: %10" G_GINT64_MODIFIER
+                  "d, current_rip: 0x%016" G_GINT64_MODIFIER
+                  "x, previous_rip: 0x%016" G_GINT64_MODIFIER "x\n",
+                  edge, current_rip, previous_rip);
+
+    }
+
+    if (unique) { edges_notified[edge] = 1; }
+
+    previous_rip = current_rip;
+
+  }
+
 }
 
 static void instr_basic_block(GumStalkerIterator *iterator,
@@ -164,18 +189,28 @@ void instrument_init(void) {
 
   optimize = (getenv("AFL_FRIDA_INST_NO_OPTIMIZE") == NULL);
   tracing = (getenv("AFL_FRIDA_INST_TRACE") != NULL);
+  unique = (getenv("AFL_FRIDA_INST_TRACE_UNIQUE") != NULL);
 
   if (!instrument_is_coverage_optimize_supported()) optimize = false;
 
   OKF("Instrumentation - optimize [%c]", optimize ? 'X' : ' ');
   OKF("Instrumentation - tracing [%c]", tracing ? 'X' : ' ');
+  OKF("Instrumentation - unique [%c]", unique ? 'X' : ' ');
 
   if (tracing && optimize) {
 
-    FATAL("AFL_FRIDA_INST_OPTIMIZE and AFL_FRIDA_INST_TRACE are incompatible");
+    FATAL("AFL_FRIDA_INST_TRACE requires AFL_FRIDA_INST_NO_OPTIMIZE");
+
+  }
+
+  if (unique && optimize) {
+
+    FATAL("AFL_FRIDA_INST_TRACE_UNIQUE requires AFL_FRIDA_INST_NO_OPTIMIZE");
 
   }
 
+  if (unique) { tracing = TRUE; }
+
   if (__afl_map_size != 0x10000) {
 
     FATAL("Bad map size: 0x%08x", __afl_map_size);
@@ -185,6 +220,28 @@ void instrument_init(void) {
   transformer =
       gum_stalker_transformer_make_from_callback(instr_basic_block, NULL, NULL);
 
+  if (unique) {
+
+    int shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
+    if (shm_id < 0) { FATAL("shm_id < 0 - errno: %d\n", errno); }
+
+    edges_notified = shmat(shm_id, NULL, 0);
+    g_assert(edges_notified != MAP_FAILED);
+
+    /*
+     * Configure the shared memory region to be removed once the process dies.
+     */
+    if (shmctl(shm_id, IPC_RMID, NULL) < 0) {
+
+      FATAL("shmctl (IPC_RMID) < 0 - errno: %d\n", errno);
+
+    }
+
+    /* Clear it, not sure it's necessary, just seems like good practice */
+    memset(edges_notified, '\0', MAP_SIZE);
+
+  }
+
   instrument_debug_init();
   asan_init();
   cmplog_init();
diff --git a/frida_mode/src/main.c b/frida_mode/src/main.c
index 1ab9993f..7ff23755 100644
--- a/frida_mode/src/main.c
+++ b/frida_mode/src/main.c
@@ -1,4 +1,5 @@
 #include <errno.h>
+#include <fcntl.h>
 #include <unistd.h>
 #include <sys/types.h>
 
@@ -27,6 +28,8 @@
 #include "stats.h"
 #include "util.h"
 
+#define PROC_MAX 65536
+
 #ifdef __APPLE__
 extern mach_port_t mach_task_self();
 extern GumAddress  gum_darwin_find_entrypoint(mach_port_t task);
@@ -78,7 +81,7 @@ static void on_main_os(int argc, char **argv, char **envp) {
 
 #endif
 
-static void embedded_init() {
+static void embedded_init(void) {
 
   static gboolean initialized = false;
   if (!initialized) {
@@ -90,7 +93,84 @@ static void embedded_init() {
 
 }
 
-void afl_frida_start() {
+static void afl_print_cmdline(void) {
+
+  char * buffer = g_malloc0(PROC_MAX);
+  gchar *fname = g_strdup_printf("/proc/%d/cmdline", getppid());
+  int    fd = open(fname, O_RDONLY);
+
+  if (fd < 0) {
+
+    FATAL("Failed to open /proc/self/cmdline, errno: (%d)", errno);
+
+  }
+
+  ssize_t bytes_read = read(fd, buffer, PROC_MAX - 1);
+  if (bytes_read < 0) {
+
+    FATAL("Failed to read /proc/self/cmdline, errno: (%d)", errno);
+
+  }
+
+  int idx = 0;
+
+  for (ssize_t i = 0; i < bytes_read; i++) {
+
+    if (i == 0 || buffer[i - 1] == '\0') {
+
+      OKF("AFL - COMMANDLINE: argv[%d] = %s", idx++, &buffer[i]);
+
+    }
+
+  }
+
+  close(fd);
+  g_free(fname);
+  g_free(buffer);
+
+}
+
+static void afl_print_env(void) {
+
+  char * buffer = g_malloc0(PROC_MAX);
+  gchar *fname = g_strdup_printf("/proc/%d/environ", getppid());
+  int    fd = open(fname, O_RDONLY);
+
+  if (fd < 0) {
+
+    FATAL("Failed to open /proc/self/cmdline, errno: (%d)", errno);
+
+  }
+
+  ssize_t bytes_read = read(fd, buffer, PROC_MAX - 1);
+  if (bytes_read < 0) {
+
+    FATAL("Failed to read /proc/self/cmdline, errno: (%d)", errno);
+
+  }
+
+  int idx = 0;
+
+  for (ssize_t i = 0; i < bytes_read; i++) {
+
+    if (i == 0 || buffer[i - 1] == '\0') {
+
+      OKF("AFL - ENVIRONMENT %3d: %s", idx++, &buffer[i]);
+
+    }
+
+  }
+
+  close(fd);
+  g_free(fname);
+  g_free(buffer);
+
+}
+
+void afl_frida_start(void) {
+
+  afl_print_cmdline();
+  afl_print_env();
 
   embedded_init();
   stalker_init();
diff --git a/frida_mode/src/persistent/persistent.c b/frida_mode/src/persistent/persistent.c
index 2ec5b9cc..243d501d 100644
--- a/frida_mode/src/persistent/persistent.c
+++ b/frida_mode/src/persistent/persistent.c
@@ -13,7 +13,6 @@ afl_persistent_hook_fn hook = NULL;
 guint64                persistent_start = 0;
 guint64                persistent_count = 0;
 guint64                persistent_ret = 0;
-guint64                persistent_ret_offset = 0;
 gboolean               persistent_debug = FALSE;
 
 void persistent_init(void) {
@@ -23,8 +22,6 @@ void persistent_init(void) {
   persistent_start = util_read_address("AFL_FRIDA_PERSISTENT_ADDR");
   persistent_count = util_read_num("AFL_FRIDA_PERSISTENT_CNT");
   persistent_ret = util_read_address("AFL_FRIDA_PERSISTENT_RET");
-  persistent_ret_offset =
-      util_read_address("AFL_FRIDA_PERSISTENT_RETADDR_OFFSET");
 
   if (getenv("AFL_FRIDA_PERSISTENT_DEBUG") != NULL) { persistent_debug = TRUE; }
 
@@ -44,14 +41,6 @@ void persistent_init(void) {
 
   }
 
-  if (persistent_ret_offset != 0 && persistent_ret == 0) {
-
-    FATAL(
-        "AFL_FRIDA_PERSISTENT_RET must be specified if "
-        "AFL_FRIDA_PERSISTENT_RETADDR_OFFSET is");
-
-  }
-
   if (persistent_start != 0 && persistent_count == 0) persistent_count = 1000;
 
   if (persistent_count != 0 && persistent_count < 100)
@@ -68,8 +57,6 @@ void persistent_init(void) {
 
   OKF("Instrumentation - persistent ret [%c] (0x%016" G_GINT64_MODIFIER "X)",
       persistent_ret == 0 ? ' ' : 'X', persistent_ret);
-  OKF("Instrumentation - persistent ret offset [%c] (%" G_GINT64_MODIFIER "d)",
-      persistent_ret_offset == 0 ? ' ' : 'X', persistent_ret_offset);
 
   if (hook_name != NULL) {
 
diff --git a/frida_mode/src/persistent/persistent_arm64.c b/frida_mode/src/persistent/persistent_arm64.c
index b23693fe..d7c6c76b 100644
--- a/frida_mode/src/persistent/persistent_arm64.c
+++ b/frida_mode/src/persistent/persistent_arm64.c
@@ -268,13 +268,15 @@ static void instrument_persitent_restore_regs(GumArm64Writer *   cw,
                                               ARM64_REG_X0, (16 * 14),
                                               GUM_INDEX_SIGNED_OFFSET);
 
-  /* Don't restore RIP or RSP, use x1-x3 as clobber */
-
-  /* LR & Adjusted SP (clobber x1) */
+  /* LR & Adjusted SP (use x1 as clobber) */
   gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X30, ARM64_REG_X1,
                                               ARM64_REG_X0, (16 * 15),
                                               GUM_INDEX_SIGNED_OFFSET);
 
+  gum_arm64_writer_put_mov_reg_reg(cw, ARM64_REG_SP, ARM64_REG_X1);
+
+  /* Don't restore RIP use x1-x3 as clobber */
+
   /* PC (x2) & CPSR (x1) */
   gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X1,
                                               ARM64_REG_X0, (16 * 16),
@@ -404,7 +406,6 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   gconstpointer loop = cw->code + 1;
 
-  /* Stack must be 16-byte aligned per ABI */
   instrument_persitent_save_regs(cw, &saved_regs);
 
   /* loop: */
@@ -450,9 +451,6 @@ void persistent_epilogue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_arm64_writer_put_brk_imm(cw, 0); }
 
-  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
-                                       persistent_ret_offset);
-
   gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
                                        GUM_ADDRESS(&saved_lr));
 
diff --git a/frida_mode/src/persistent/persistent_x64.c b/frida_mode/src/persistent/persistent_x64.c
index 858ad38e..653acefe 100644
--- a/frida_mode/src/persistent/persistent_x64.c
+++ b/frida_mode/src/persistent/persistent_x64.c
@@ -43,6 +43,7 @@ struct x86_64_regs {
 typedef struct x86_64_regs arch_api_regs;
 
 static arch_api_regs saved_regs = {0};
+static gpointer      saved_ret = NULL;
 
 gboolean persistent_is_supported(void) {
 
@@ -104,7 +105,7 @@ static void instrument_persitent_save_regs(GumX86Writer *      cw,
 
   /* RED_ZONE + Saved flags, RAX, alignment */
   gum_x86_writer_put_add_reg_imm(cw, GUM_REG_RBX,
-                                 GUM_RED_ZONE_SIZE + (0x8 * 3));
+                                 GUM_RED_ZONE_SIZE + (0x8 * 2));
   gum_x86_writer_put_mov_reg_offset_ptr_reg(cw, GUM_REG_RAX, (0x8 * 16),
                                             GUM_REG_RBX);
 
@@ -159,7 +160,9 @@ static void instrument_persitent_restore_regs(GumX86Writer *      cw,
   gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_R15, GUM_REG_RAX,
                                             (0x8 * 14));
 
-  /* Don't restore RIP or RSP */
+  /* Don't restore RIP */
+  gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_RSP, GUM_REG_RAX,
+                                            (0x8 * 16));
 
   /* Restore RBX, RAX & Flags */
   gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
@@ -242,6 +245,31 @@ static void persistent_prologue_hook(GumX86Writer *      cw,
 
 }
 
+static void instrument_persitent_save_ret(GumX86Writer *cw) {
+
+  /* Stack usage by this function */
+  gssize offset = GUM_RED_ZONE_SIZE + (3 * 8);
+  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
+                                        -(GUM_RED_ZONE_SIZE));
+
+  gum_x86_writer_put_pushfx(cw);
+  gum_x86_writer_put_push_reg(cw, GUM_REG_RAX);
+  gum_x86_writer_put_push_reg(cw, GUM_REG_RBX);
+
+  gum_x86_writer_put_mov_reg_address(cw, GUM_REG_RAX, GUM_ADDRESS(&saved_ret));
+  gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_RBX, GUM_REG_RSP,
+                                            offset);
+  gum_x86_writer_put_mov_reg_ptr_reg(cw, GUM_REG_RAX, GUM_REG_RBX);
+
+  gum_x86_writer_put_pop_reg(cw, GUM_REG_RBX);
+  gum_x86_writer_put_pop_reg(cw, GUM_REG_RAX);
+  gum_x86_writer_put_popfx(cw);
+
+  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
+                                        (GUM_RED_ZONE_SIZE));
+
+}
+
 void persistent_prologue(GumStalkerOutput *output) {
 
   /*
@@ -268,11 +296,10 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   gconstpointer loop = cw->code + 1;
 
-  /* Stack must be 16-byte aligned per ABI */
-  instrument_persitent_save_regs(cw, &saved_regs);
+  /* Pop the return value */
+  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, 8);
 
-  /* pop the return value */
-  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, (8));
+  instrument_persitent_save_regs(cw, &saved_regs);
 
   /* loop: */
   gum_x86_writer_put_label(cw, loop);
@@ -304,6 +331,8 @@ void persistent_prologue(GumStalkerOutput *output) {
   /* original: */
   gum_x86_writer_put_label(cw, original);
 
+  instrument_persitent_save_ret(cw);
+
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
 }
@@ -314,9 +343,15 @@ void persistent_epilogue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
-  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
-                                        persistent_ret_offset);
-  gum_x86_writer_put_ret(cw);
+  /* The stack should be aligned when we re-enter our loop */
+  gconstpointer zero = cw->code + 1;
+  gum_x86_writer_put_test_reg_u32(cw, GUM_REG_RSP, 0xF);
+  gum_x86_writer_put_jcc_near_label(cw, X86_INS_JE, zero, GUM_NO_HINT);
+  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, -8);
+  gum_x86_writer_put_label(cw, zero);
+
+  gum_x86_writer_put_mov_reg_address(cw, GUM_REG_RAX, GUM_ADDRESS(&saved_ret));
+  gum_x86_writer_put_jmp_reg_ptr(cw, GUM_REG_RAX);
 
 }
 
diff --git a/frida_mode/src/persistent/persistent_x86.c b/frida_mode/src/persistent/persistent_x86.c
index 0675edf4..7add6e99 100644
--- a/frida_mode/src/persistent/persistent_x86.c
+++ b/frida_mode/src/persistent/persistent_x86.c
@@ -39,6 +39,7 @@ struct x86_regs {
 typedef struct x86_regs arch_api_regs;
 
 static arch_api_regs saved_regs = {0};
+static gpointer      saved_ret = NULL;
 
 gboolean persistent_is_supported(void) {
 
@@ -117,7 +118,9 @@ static void instrument_persitent_restore_regs(GumX86Writer *   cw,
   gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_EBP, GUM_REG_EAX,
                                             (0x4 * 6));
 
-  /* Don't restore RIP or RSP */
+  /* Don't restore RIP */
+  gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_ESP, GUM_REG_EAX,
+                                            (0x4 * 8));
 
   /* Restore RBX, RAX & Flags */
   gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_EBX, GUM_REG_EAX,
@@ -184,6 +187,26 @@ static void persistent_prologue_hook(GumX86Writer *cw, struct x86_regs *regs) {
 
 }
 
+static void instrument_persitent_save_ret(GumX86Writer *cw) {
+
+  /* Stack usage by this function */
+  gssize offset = (3 * 4);
+
+  gum_x86_writer_put_pushfx(cw);
+  gum_x86_writer_put_push_reg(cw, GUM_REG_EAX);
+  gum_x86_writer_put_push_reg(cw, GUM_REG_EBX);
+
+  gum_x86_writer_put_mov_reg_address(cw, GUM_REG_EAX, GUM_ADDRESS(&saved_ret));
+  gum_x86_writer_put_mov_reg_reg_offset_ptr(cw, GUM_REG_EBX, GUM_REG_ESP,
+                                            offset);
+  gum_x86_writer_put_mov_reg_ptr_reg(cw, GUM_REG_EAX, GUM_REG_EBX);
+
+  gum_x86_writer_put_pop_reg(cw, GUM_REG_EBX);
+  gum_x86_writer_put_pop_reg(cw, GUM_REG_EAX);
+  gum_x86_writer_put_popfx(cw);
+
+}
+
 void persistent_prologue(GumStalkerOutput *output) {
 
   /*
@@ -210,11 +233,10 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   gconstpointer loop = cw->code + 1;
 
-  /* Stack must be 16-byte aligned per ABI */
-  instrument_persitent_save_regs(cw, &saved_regs);
-
   /* Pop the return value */
-  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_ESP, GUM_REG_ESP, (4));
+  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_ESP, GUM_REG_ESP, 4);
+
+  instrument_persitent_save_regs(cw, &saved_regs);
 
   /* loop: */
   gum_x86_writer_put_label(cw, loop);
@@ -244,6 +266,8 @@ void persistent_prologue(GumStalkerOutput *output) {
   /* original: */
   gum_x86_writer_put_label(cw, original);
 
+  instrument_persitent_save_ret(cw);
+
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
 }
@@ -254,10 +278,8 @@ void persistent_epilogue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
-  gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_ESP, GUM_REG_ESP,
-                                        persistent_ret_offset);
-
-  gum_x86_writer_put_ret(cw);
+  gum_x86_writer_put_mov_reg_address(cw, GUM_REG_EAX, GUM_ADDRESS(&saved_ret));
+  gum_x86_writer_put_jmp_reg_ptr(cw, GUM_REG_EAX);
 
 }
 
diff --git a/frida_mode/test/persistent_ret/GNUmakefile b/frida_mode/test/persistent_ret/GNUmakefile
index 4c9d8a19..2de51d86 100644
--- a/frida_mode/test/persistent_ret/GNUmakefile
+++ b/frida_mode/test/persistent_ret/GNUmakefile
@@ -38,8 +38,6 @@ ifeq "$(ARCH)" "x86"
  AFL_FRIDA_PERSISTENT_RET=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s slow -b 0x56555000)
 endif
 
-AFL_FRIDA_PERSISTENT_RETADDR_OFFSET:=0x50
-
 .PHONY: all 32 clean qemu frida
 
 all: $(TESTINSTBIN)
@@ -76,7 +74,6 @@ frida: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 frida_ret: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 	AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \
 	AFL_FRIDA_PERSISTENT_RET=$(AFL_FRIDA_PERSISTENT_RET) \
-	AFL_FRIDA_PERSISTENT_RETADDR_OFFSET=$(AFL_FRIDA_PERSISTENT_RETADDR_OFFSET) \
 	$(ROOT)afl-fuzz \
 		-D \
 		-O \
@@ -89,7 +86,6 @@ debug: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 	gdb \
 		--ex 'set environment AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR)' \
 		--ex 'set environment AFL_FRIDA_PERSISTENT_RET=$(AFL_FRIDA_PERSISTENT_RET)' \
-		--ex 'set environment AFL_FRIDA_PERSISTENT_RETADDR_OFFSET=$(AFL_FRIDA_PERSISTENT_RETADDR_OFFSET)' \
 		--ex 'set environment AFL_FRIDA_PERSISTENT_DEBUG=1' \
 		--ex 'set environment AFL_DEBUG_CHILD=1' \
 		--ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \
@@ -99,7 +95,6 @@ debug: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 run: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 	AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \
 	AFL_FRIDA_PERSISTENT_RET=$(AFL_FRIDA_PERSISTENT_RET) \
-	AFL_FRIDA_PERSISTENT_RETADDR_OFFSET=$(AFL_FRIDA_PERSISTENT_RETADDR_OFFSET) \
 	AFL_DEBUG_CHILD=1 \
 	LD_PRELOAD=$(ROOT)afl-frida-trace.so \
 		$(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
diff --git a/frida_mode/test/unstable/GNUmakefile b/frida_mode/test/unstable/GNUmakefile
new file mode 100644
index 00000000..fed417a3
--- /dev/null
+++ b/frida_mode/test/unstable/GNUmakefile
@@ -0,0 +1,90 @@
+PWD:=$(shell pwd)/
+ROOT:=$(shell realpath $(PWD)../../..)/
+BUILD_DIR:=$(PWD)build/
+UNSTABLE_DATA_DIR:=$(BUILD_DIR)in/
+UNSTABLE_DATA_FILE:=$(UNSTABLE_DATA_DIR)in
+
+UNSTABLE_BIN:=$(BUILD_DIR)unstable
+UNSTABLE_SRC:=$(PWD)unstable.c
+
+QEMU_OUT:=$(BUILD_DIR)qemu-out
+FRIDA_OUT:=$(BUILD_DIR)frida-out
+
+ifndef ARCH
+
+ARCH=$(shell uname -m)
+ifeq "$(ARCH)" "aarch64"
+ ARCH:=arm64
+endif
+
+ifeq "$(ARCH)" "i686"
+ ARCH:=x86
+endif
+endif
+
+AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x4000000000)
+
+ifeq "$(ARCH)" "aarch64"
+ AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x0000aaaaaaaaa000)
+endif
+
+ifeq "$(ARCH)" "x86_64"
+ AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x0000555555554000)
+endif
+
+ifeq "$(ARCH)" "x86"
+ AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x56555000)
+endif
+
+.PHONY: all 32 clean qemu frida
+
+all: $(UNSTABLE_BIN)
+	make -C $(ROOT)frida_mode/
+
+32:
+	CFLAGS="-m32" LDFLAGS="-m32" ARCH="x86" make all
+
+$(BUILD_DIR):
+	mkdir -p $@
+
+$(UNSTABLE_DATA_DIR): | $(BUILD_DIR)
+	mkdir -p $@
+
+$(UNSTABLE_DATA_FILE): | $(UNSTABLE_DATA_DIR)
+	echo -n "000" > $@
+
+$(UNSTABLE_BIN): $(UNSTABLE_SRC) | $(BUILD_DIR)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+clean:
+	rm -rf $(BUILD_DIR)
+
+
+qemu: $(UNSTABLE_BIN) $(UNSTABLE_DATA_FILE)
+	AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \
+	$(ROOT)afl-fuzz \
+		-D \
+		-Q \
+		-i $(UNSTABLE_DATA_DIR) \
+		-o $(QEMU_OUT) \
+		-- \
+			$(UNSTABLE_BIN) @@
+
+frida: $(UNSTABLE_BIN) $(UNSTABLE_DATA_FILE)
+	AFL_DEBUG=1 \
+	AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \
+	AFL_FRIDA_INST_TRACE_UNIQUE=1 \
+	AFL_FRIDA_INST_NO_OPTIMIZE=1 \
+	$(ROOT)afl-fuzz \
+		-D \
+		-O \
+		-i $(UNSTABLE_DATA_DIR) \
+		-o $(FRIDA_OUT) \
+		-- \
+			$(UNSTABLE_BIN) @@
+
+debug:
+	gdb \
+		--ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \
+		--ex 'set disassembly-flavor intel' \
+		--args $(UNSTABLE_BIN) $(UNSTABLE_DATA_FILE)
diff --git a/frida_mode/test/unstable/Makefile b/frida_mode/test/unstable/Makefile
new file mode 100644
index 00000000..f843af19
--- /dev/null
+++ b/frida_mode/test/unstable/Makefile
@@ -0,0 +1,19 @@
+all:
+	@echo trying to use GNU make...
+	@gmake all || echo please install GNUmake
+
+32:
+	@echo trying to use GNU make...
+	@gmake 32 || echo please install GNUmake
+
+clean:
+	@gmake clean
+
+qemu:
+	@gmake qemu
+
+frida:
+	@gmake frida
+
+debug:
+	@gmake debug
diff --git a/frida_mode/test/unstable/get_symbol_addr.py b/frida_mode/test/unstable/get_symbol_addr.py
new file mode 100755
index 00000000..1c46e010
--- /dev/null
+++ b/frida_mode/test/unstable/get_symbol_addr.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+import argparse
+from elftools.elf.elffile import ELFFile
+
+def process_file(file, symbol, base):
+    with open(file, 'rb') as f:
+        elf = ELFFile(f)
+        symtab = elf.get_section_by_name('.symtab')
+        mains = symtab.get_symbol_by_name(symbol)
+        if len(mains) != 1:
+            print ("Failed to find main")
+            return 1
+
+        main_addr = mains[0]['st_value']
+        main = base + main_addr
+        print ("0x%016x" % main)
+        return 0
+
+def hex_value(x):
+    return int(x, 16)
+
+def main():
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('-f', '--file', dest='file', type=str,
+                    help='elf file name', required=True)
+    parser.add_argument('-s', '--symbol', dest='symbol', type=str,
+                    help='symbol name', required=True)
+    parser.add_argument('-b', '--base', dest='base', type=hex_value,
+                    help='elf base address', required=True)
+
+    args = parser.parse_args()
+    return process_file (args.file, args.symbol, args.base)
+
+if __name__ == "__main__":
+    ret = main()
+    exit(ret)
diff --git a/frida_mode/test/unstable/unstable.c b/frida_mode/test/unstable/unstable.c
new file mode 100644
index 00000000..67d56b73
--- /dev/null
+++ b/frida_mode/test/unstable/unstable.c
@@ -0,0 +1,67 @@
+/*
+   american fuzzy lop++ - a trivial program to test the build
+   --------------------------------------------------------
+   Originally written by Michal Zalewski
+   Copyright 2014 Google Inc. All rights reserved.
+   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+     http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __APPLE__
+  #define TESTINSTR_SECTION
+#else
+  #define TESTINSTR_SECTION __attribute__((section(".testinstr")))
+#endif
+
+void LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+
+  if (size < 1) return;
+
+  int r = rand();
+  if ((r % 2) == 0) {
+    printf ("Hooray all even\n");
+  } else {
+    printf ("Hmm that's odd\n");
+  }
+
+  // we support three input cases
+  if (data[0] == '0')
+    printf("Looks like a zero to me!\n");
+  else if (data[0] == '1')
+    printf("Pretty sure that is a one!\n");
+  else
+    printf("Neither one or zero? How quaint!\n");
+
+}
+
+void run_test(char * file) {
+  fprintf(stderr, "Running: %s\n", file);
+  FILE *f = fopen(file, "r");
+  assert(f);
+  fseek(f, 0, SEEK_END);
+  size_t len = ftell(f);
+  fseek(f, 0, SEEK_SET);
+  unsigned char *buf = (unsigned char*)malloc(len);
+  size_t n_read = fread(buf, 1, len, f);
+  fclose(f);
+  assert(n_read == len);
+  LLVMFuzzerTestOneInput(buf, len);
+  free(buf);
+  fprintf(stderr, "Done:    %s: (%zd bytes)\n", file, n_read);
+}
+
+int main(int argc, char **argv) {
+  srand(1);
+  fprintf(stderr, "StandaloneFuzzTargetMain: running %d inputs\n", argc - 1);
+  for (int i = 1; i < argc; i++) {
+    run_test(argv[i]);
+  }
+}
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 4aba3bdf..2920f905 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -478,9 +478,7 @@ typedef struct afl_state {
 
   u32 hang_tmout;                       /* Timeout used for hang det (ms)   */
 
-  u8 cal_cycles,                        /* Calibration cycles defaults      */
-      cal_cycles_long,                  /* Calibration cycles defaults      */
-      havoc_stack_pow2,                 /* HAVOC_STACK_POW2                 */
+  u8 havoc_stack_pow2,                  /* HAVOC_STACK_POW2                 */
       no_unlink,                        /* do not unlink cur_input          */
       debug,                            /* Debug mode                       */
       custom_only,                      /* Custom mutator only mode         */
diff --git a/include/envs.h b/include/envs.h
index 15116fc1..54bb6597 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -60,6 +60,7 @@ static char *afl_environment_variables[] = {
     "AFL_FRIDA_INST_NO_PREFETCH",
     "AFL_FRIDA_INST_RANGES",
     "AFL_FRIDA_INST_TRACE",
+    "AFL_FRIDA_INST_UNSTABLE",
     "AFL_FRIDA_OUTPUT_STDOUT",
     "AFL_FRIDA_OUTPUT_STDERR",
     "AFL_FRIDA_PERSISTENT_ADDR",
@@ -67,7 +68,6 @@ static char *afl_environment_variables[] = {
     "AFL_FRIDA_PERSISTENT_DEBUG",
     "AFL_FRIDA_PERSISTENT_HOOK",
     "AFL_FRIDA_PERSISTENT_RET",
-    "AFL_FRIDA_PERSISTENT_RETADDR_OFFSET",
     "AFL_FRIDA_STATS_FILE",
     "AFL_FRIDA_STATS_INTERVAL",
     "AFL_FRIDA_STATS_TRANSITIONS",
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index af32e2f9..3239ea91 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -96,9 +96,8 @@ bool isIgnoreFunction(const llvm::Function *F) {
 
   static constexpr const char *ignoreSubstringList[] = {
 
-      "__asan",       "__msan",     "__ubsan", "__lsan",
-      "__san",        "__sanitize", "__cxx",   "_GLOBAL__",
-      "DebugCounter", "DwarfDebug", "DebugLoc"
+      "__asan", "__msan",       "__ubsan",    "__lsan",  "__san", "__sanitize",
+      "__cxx",  "DebugCounter", "DwarfDebug", "DebugLoc"
 
   };
 
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index b02a89fb..68f6c329 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -47,6 +47,10 @@
 using namespace llvm;
 #include "afl-llvm-common.h"
 
+// uncomment this toggle function verification at each step. horribly slow, but
+// helps to pinpoint a potential problem in the splitting code.
+//#define VERIFY_TOO_MUCH 1
+
 namespace {
 
 class SplitComparesTransform : public ModulePass {
@@ -67,28 +71,101 @@ class SplitComparesTransform : public ModulePass {
   const char *getPassName() const override {
 
 #endif
-    return "simplifies and splits ICMP instructions";
+    return "AFL_SplitComparesTransform";
 
   }
 
  private:
   int enableFPSplit;
 
-  size_t splitIntCompares(Module &M, unsigned bitw);
+  unsigned target_bitwidth = 8;
+
+  size_t count = 0;
+
   size_t splitFPCompares(Module &M);
-  bool   simplifyCompares(Module &M);
   bool   simplifyFPCompares(Module &M);
-  bool   simplifyIntSignedness(Module &M);
   size_t nextPowerOfTwo(size_t in);
 
+  using CmpWorklist = SmallVector<CmpInst *, 8>;
+
+  /// simplify the comparison and then split the comparison until the
+  /// target_bitwidth is reached.
+  bool simplifyAndSplit(CmpInst *I, Module &M);
+  /// simplify a non-strict comparison (e.g., less than or equals)
+  bool simplifyOrEqualsCompare(CmpInst *IcmpInst, Module &M,
+                               CmpWorklist &worklist);
+  /// simplify a signed comparison (signed less or greater than)
+  bool simplifySignedCompare(CmpInst *IcmpInst, Module &M,
+                             CmpWorklist &worklist);
+  /// splits an icmp into nested icmps recursivly until target_bitwidth is
+  /// reached
+  bool splitCompare(CmpInst *I, Module &M, CmpWorklist &worklist);
+
+  /// print an error to llvm's errs stream, but only if not ordered to be quiet
+  void reportError(const StringRef msg, Instruction *I, Module &M) {
+
+    if (!be_quiet) {
+
+      errs() << "[AFL++ SplitComparesTransform] ERROR: " << msg << "\n";
+      if (debug) {
+
+        if (I) {
+
+          errs() << "Instruction = " << *I << "\n";
+          if (auto BB = I->getParent()) {
+
+            if (auto F = BB->getParent()) {
+
+              if (F->hasName()) {
+
+                errs() << "|-> in function " << F->getName() << " ";
+
+              }
+
+            }
+
+          }
+
+        }
+
+        auto n = M.getName();
+        if (n.size() > 0) { errs() << "in module " << n << "\n"; }
+
+      }
+
+    }
+
+  }
+
+  bool isSupportedBitWidth(unsigned bitw) {
+
+    // IDK whether the icmp code works on other bitwidths. I guess not? So we
+    // try to avoid dealing with other weird icmp's that llvm might use (looking
+    // at you `icmp i0`).
+    switch (bitw) {
+
+      case 8:
+      case 16:
+      case 32:
+      case 64:
+      case 128:
+      case 256:
+        return true;
+      default:
+        return false;
+
+    }
+
+  }
+
 };
 
 }  // namespace
 
 char SplitComparesTransform::ID = 0;
 
-/* This function splits FCMP instructions with xGE or xLE predicates into two
- * FCMP instructions with predicate xGT or xLT and EQ */
+/// This function splits FCMP instructions with xGE or xLE predicates into two
+/// FCMP instructions with predicate xGT or xLT and EQ
 bool SplitComparesTransform::simplifyFPCompares(Module &M) {
 
   LLVMContext &              C = M.getContext();
@@ -221,292 +298,481 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) {
 
 }
 
-/* This function splits ICMP instructions with xGE or xLE predicates into two
- * ICMP instructions with predicate xGT or xLT and EQ */
-bool SplitComparesTransform::simplifyCompares(Module &M) {
+/// This function splits ICMP instructions with xGE or xLE predicates into two
+/// ICMP instructions with predicate xGT or xLT and EQ
+bool SplitComparesTransform::simplifyOrEqualsCompare(CmpInst *    IcmpInst,
+                                                     Module &     M,
+                                                     CmpWorklist &worklist) {
 
-  LLVMContext &              C = M.getContext();
-  std::vector<Instruction *> icomps;
-  IntegerType *              Int1Ty = IntegerType::getInt1Ty(C);
+  LLVMContext &C = M.getContext();
+  IntegerType *Int1Ty = IntegerType::getInt1Ty(C);
 
-  /* iterate over all functions, bbs and instruction and add
-   * all integer comparisons with >= and <= predicates to the icomps vector */
-  for (auto &F : M) {
+  /* find out what the new predicate is going to be */
+  auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+  if (!cmp_inst) { return false; }
 
-    if (!isInInstrumentList(&F)) continue;
+  BasicBlock *bb = IcmpInst->getParent();
 
-    for (auto &BB : F) {
+  auto op0 = IcmpInst->getOperand(0);
+  auto op1 = IcmpInst->getOperand(1);
 
-      for (auto &IN : BB) {
+  CmpInst::Predicate pred = cmp_inst->getPredicate();
+  CmpInst::Predicate new_pred;
 
-        CmpInst *selectcmpInst = nullptr;
+  switch (pred) {
 
-        if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+    case CmpInst::ICMP_UGE:
+      new_pred = CmpInst::ICMP_UGT;
+      break;
+    case CmpInst::ICMP_SGE:
+      new_pred = CmpInst::ICMP_SGT;
+      break;
+    case CmpInst::ICMP_ULE:
+      new_pred = CmpInst::ICMP_ULT;
+      break;
+    case CmpInst::ICMP_SLE:
+      new_pred = CmpInst::ICMP_SLT;
+      break;
+    default:  // keep the compiler happy
+      return false;
 
-          if (selectcmpInst->getPredicate() == CmpInst::ICMP_UGE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SGE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_ULE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) {
+  }
 
-            auto op0 = selectcmpInst->getOperand(0);
-            auto op1 = selectcmpInst->getOperand(1);
+  /* split before the icmp instruction */
+  BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+
+  /* the old bb now contains a unconditional jump to the new one (end_bb)
+   * we need to delete it later */
+
+  /* create the ICMP instruction with new_pred and add it to the old basic
+   * block bb it is now at the position where the old IcmpInst was */
+  CmpInst *icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
+  bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), icmp_np);
+
+  /* create a new basic block which holds the new EQ icmp */
+  CmpInst *icmp_eq;
+  /* insert middle_bb before end_bb */
+  BasicBlock *middle_bb =
+      BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+  icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1);
+  middle_bb->getInstList().push_back(icmp_eq);
+  /* add an unconditional branch to the end of middle_bb with destination
+   * end_bb */
+  BranchInst::Create(end_bb, middle_bb);
+
+  /* replace the uncond branch with a conditional one, which depends on the
+   * new_pred icmp. True goes to end, false to the middle (injected) bb */
+  auto term = bb->getTerminator();
+  BranchInst::Create(end_bb, middle_bb, icmp_np, bb);
+  term->eraseFromParent();
+
+  /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI
+   * inst to wire up the loose ends */
+  PHINode *PN = PHINode::Create(Int1Ty, 2, "");
+  /* the first result depends on the outcome of icmp_eq */
+  PN->addIncoming(icmp_eq, middle_bb);
+  /* if the source was the original bb we know that the icmp_np yielded true
+   * hence we can hardcode this value */
+  PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+  /* replace the old IcmpInst with our new and shiny PHI inst */
+  BasicBlock::iterator ii(IcmpInst);
+  ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+
+  worklist.push_back(icmp_np);
+  worklist.push_back(icmp_eq);
 
-            IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
-            IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+  return true;
 
-            /* this is probably not needed but we do it anyway */
-            if (!intTyOp0 || !intTyOp1) { continue; }
+}
 
-            icomps.push_back(selectcmpInst);
+/// Simplify a signed comparison operator by splitting it into a unsigned and
+/// bit comparison. add all resulting comparisons to
+/// the worklist passed as a reference.
+bool SplitComparesTransform::simplifySignedCompare(CmpInst *IcmpInst, Module &M,
+                                                   CmpWorklist &worklist) {
 
-          }
+  LLVMContext &C = M.getContext();
+  IntegerType *Int1Ty = IntegerType::getInt1Ty(C);
 
-        }
+  BasicBlock *bb = IcmpInst->getParent();
 
-      }
+  auto op0 = IcmpInst->getOperand(0);
+  auto op1 = IcmpInst->getOperand(1);
 
-    }
+  IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+  if (!intTyOp0) { return false; }
+  unsigned     bitw = intTyOp0->getBitWidth();
+  IntegerType *IntType = IntegerType::get(C, bitw);
 
-  }
+  /* get the new predicate */
+  auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+  if (!cmp_inst) { return false; }
+  auto               pred = cmp_inst->getPredicate();
+  CmpInst::Predicate new_pred;
 
-  if (!icomps.size()) { return false; }
+  if (pred == CmpInst::ICMP_SGT) {
 
-  for (auto &IcmpInst : icomps) {
+    new_pred = CmpInst::ICMP_UGT;
 
-    BasicBlock *bb = IcmpInst->getParent();
+  } else {
 
-    auto op0 = IcmpInst->getOperand(0);
-    auto op1 = IcmpInst->getOperand(1);
+    new_pred = CmpInst::ICMP_ULT;
 
-    /* find out what the new predicate is going to be */
-    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
-    if (!cmp_inst) { continue; }
-    auto               pred = cmp_inst->getPredicate();
-    CmpInst::Predicate new_pred;
+  }
 
-    switch (pred) {
+  BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+
+  /* create a 1 bit compare for the sign bit. to do this shift and trunc
+   * the original operands so only the first bit remains.*/
+  Value *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit;
+
+  IRBuilder<> IRB(bb->getTerminator());
+  s_op0 = IRB.CreateLShr(op0, ConstantInt::get(IntType, bitw - 1));
+  t_op0 = IRB.CreateTruncOrBitCast(s_op0, Int1Ty);
+  s_op1 = IRB.CreateLShr(op1, ConstantInt::get(IntType, bitw - 1));
+  t_op1 = IRB.CreateTruncOrBitCast(s_op1, Int1Ty);
+  /* compare of the sign bits */
+  icmp_sign_bit = IRB.CreateICmp(CmpInst::ICMP_EQ, t_op0, t_op1);
+
+  /* create a new basic block which is executed if the signedness bit is
+   * different */
+  CmpInst *   icmp_inv_sig_cmp;
+  BasicBlock *sign_bb =
+      BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb);
+  if (pred == CmpInst::ICMP_SGT) {
+
+    /* if we check for > and the op0 positive and op1 negative then the final
+     * result is true. if op0 negative and op1 pos, the cmp must result
+     * in false
+     */
+    icmp_inv_sig_cmp =
+        CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1);
 
-      case CmpInst::ICMP_UGE:
-        new_pred = CmpInst::ICMP_UGT;
-        break;
-      case CmpInst::ICMP_SGE:
-        new_pred = CmpInst::ICMP_SGT;
-        break;
-      case CmpInst::ICMP_ULE:
-        new_pred = CmpInst::ICMP_ULT;
-        break;
-      case CmpInst::ICMP_SLE:
-        new_pred = CmpInst::ICMP_SLT;
-        break;
-      default:  // keep the compiler happy
-        continue;
+  } else {
 
-    }
+    /* just the inverse of the above statement */
+    icmp_inv_sig_cmp =
+        CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1);
 
-    /* split before the icmp instruction */
-    BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+  }
 
-    /* the old bb now contains a unconditional jump to the new one (end_bb)
-     * we need to delete it later */
+  sign_bb->getInstList().push_back(icmp_inv_sig_cmp);
+  BranchInst::Create(end_bb, sign_bb);
 
-    /* create the ICMP instruction with new_pred and add it to the old basic
-     * block bb it is now at the position where the old IcmpInst was */
-    Instruction *icmp_np;
-    icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
-                             icmp_np);
+  /* create a new bb which is executed if signedness is equal */
+  CmpInst *   icmp_usign_cmp;
+  BasicBlock *middle_bb =
+      BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+  /* we can do a normal unsigned compare now */
+  icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
 
-    /* create a new basic block which holds the new EQ icmp */
-    Instruction *icmp_eq;
-    /* insert middle_bb before end_bb */
-    BasicBlock *middle_bb =
-        BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
-    icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1);
-    middle_bb->getInstList().push_back(icmp_eq);
-    /* add an unconditional branch to the end of middle_bb with destination
-     * end_bb */
-    BranchInst::Create(end_bb, middle_bb);
+  middle_bb->getInstList().push_back(icmp_usign_cmp);
+  BranchInst::Create(end_bb, middle_bb);
 
-    /* replace the uncond branch with a conditional one, which depends on the
-     * new_pred icmp. True goes to end, false to the middle (injected) bb */
-    auto term = bb->getTerminator();
-    BranchInst::Create(end_bb, middle_bb, icmp_np, bb);
-    term->eraseFromParent();
+  auto term = bb->getTerminator();
+  /* if the sign is eq do a normal unsigned cmp, else we have to check the
+   * signedness bit */
+  BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb);
+  term->eraseFromParent();
 
-    /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI
-     * inst to wire up the loose ends */
-    PHINode *PN = PHINode::Create(Int1Ty, 2, "");
-    /* the first result depends on the outcome of icmp_eq */
-    PN->addIncoming(icmp_eq, middle_bb);
-    /* if the source was the original bb we know that the icmp_np yielded true
-     * hence we can hardcode this value */
-    PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
-    /* replace the old IcmpInst with our new and shiny PHI inst */
-    BasicBlock::iterator ii(IcmpInst);
-    ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+  PHINode *PN = PHINode::Create(Int1Ty, 2, "");
 
-  }
+  PN->addIncoming(icmp_usign_cmp, middle_bb);
+  PN->addIncoming(icmp_inv_sig_cmp, sign_bb);
+
+  BasicBlock::iterator ii(IcmpInst);
+  ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+
+  // save for later
+  worklist.push_back(icmp_usign_cmp);
+
+  // signed comparisons are not supported by the splitting code, so we must not
+  // add it to the worklist.
+  // worklist.push_back(icmp_inv_sig_cmp);
 
   return true;
 
 }
 
-/* this function transforms signed compares to equivalent unsigned compares */
-bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
+bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M,
+                                          CmpWorklist &worklist) {
 
-  LLVMContext &              C = M.getContext();
-  std::vector<Instruction *> icomps;
-  IntegerType *              Int1Ty = IntegerType::getInt1Ty(C);
+  auto pred = cmp_inst->getPredicate();
+  switch (pred) {
 
-  /* iterate over all functions, bbs and instructions and add
-   * all signed compares to icomps vector */
-  for (auto &F : M) {
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_UGT:
+    case CmpInst::ICMP_ULT:
+      break;
+    default:
+      // unsupported predicate!
+      return false;
 
-    if (!isInInstrumentList(&F)) continue;
+  }
 
-    for (auto &BB : F) {
+  auto op0 = cmp_inst->getOperand(0);
+  auto op1 = cmp_inst->getOperand(1);
 
-      for (auto &IN : BB) {
+  // get bitwidth by checking the bitwidth of the first operator
+  IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+  if (!intTyOp0) {
 
-        CmpInst *selectcmpInst = nullptr;
+    // not an integer type
+    return false;
 
-        if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+  }
 
-          if (selectcmpInst->getPredicate() == CmpInst::ICMP_SGT ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_SLT) {
+  unsigned bitw = intTyOp0->getBitWidth();
+  if (bitw == target_bitwidth) {
 
-            auto op0 = selectcmpInst->getOperand(0);
-            auto op1 = selectcmpInst->getOperand(1);
+    // already the target bitwidth so we have to do nothing here.
+    return true;
+
+  }
+
+  LLVMContext &C = M.getContext();
+  IntegerType *Int1Ty = IntegerType::getInt1Ty(C);
+  BasicBlock * bb = cmp_inst->getParent();
+  IntegerType *OldIntType = IntegerType::get(C, bitw);
+  IntegerType *NewIntType = IntegerType::get(C, bitw / 2);
+  BasicBlock * end_bb = bb->splitBasicBlock(BasicBlock::iterator(cmp_inst));
+  CmpInst *    icmp_high, *icmp_low;
 
-            IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
-            IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+  /* create the comparison of the top halves of the original operands */
+  Value *s_op0, *op0_high, *s_op1, *op1_high;
 
-            /* see above */
-            if (!intTyOp0 || !intTyOp1) { continue; }
+  IRBuilder<> IRB(bb->getTerminator());
 
-            /* i think this is not possible but to lazy to look it up */
-            if (intTyOp0->getBitWidth() != intTyOp1->getBitWidth()) {
+  s_op0 = IRB.CreateBinOp(Instruction::LShr, op0,
+                          ConstantInt::get(OldIntType, bitw / 2));
+  op0_high = IRB.CreateTruncOrBitCast(s_op0, NewIntType);
 
-              continue;
+  s_op1 = IRB.CreateBinOp(Instruction::LShr, op1,
+                          ConstantInt::get(OldIntType, bitw / 2));
+  op1_high = IRB.CreateTruncOrBitCast(s_op1, NewIntType);
+  icmp_high = cast<CmpInst>(IRB.CreateICmp(pred, op0_high, op1_high));
 
-            }
+  PHINode *PN = nullptr;
 
-            icomps.push_back(selectcmpInst);
+  /* now we have to destinguish between == != and > < */
+  switch (pred) {
 
-          }
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_NE: {
 
-        }
+      /* transformation for == and != icmps */
+
+      /* create a compare for the lower half of the original operands */
+      BasicBlock *cmp_low_bb =
+          BasicBlock::Create(C, "" /*"injected"*/, end_bb->getParent(), end_bb);
+
+      Value *     op0_low, *op1_low;
+      IRBuilder<> Builder(cmp_low_bb);
+
+      op0_low = Builder.CreateTrunc(op0, NewIntType);
+      op1_low = Builder.CreateTrunc(op1, NewIntType);
+      icmp_low = cast<CmpInst>(Builder.CreateICmp(pred, op0_low, op1_low));
+
+      BranchInst::Create(end_bb, cmp_low_bb);
+
+      /* dependent on the cmp of the high parts go to the end or go on with
+       * the comparison */
+      auto        term = bb->getTerminator();
+      BranchInst *br = nullptr;
+      if (pred == CmpInst::ICMP_EQ) {
+
+        br = BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb);
+
+      } else {
+
+        /* CmpInst::ICMP_NE */
+        br = BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb);
 
       }
 
+      term->eraseFromParent();
+
+      /* create the PHI and connect the edges accordingly */
+      PN = PHINode::Create(Int1Ty, 2, "");
+      PN->addIncoming(icmp_low, cmp_low_bb);
+      Value *val = nullptr;
+      if (pred == CmpInst::ICMP_EQ) {
+
+        val = ConstantInt::get(Int1Ty, 0);
+
+      } else {
+
+        /* CmpInst::ICMP_NE */
+        val = ConstantInt::get(Int1Ty, 1);
+
+      }
+
+      PN->addIncoming(val, icmp_high->getParent());
+      break;
+
     }
 
-  }
+    case CmpInst::ICMP_UGT:
+    case CmpInst::ICMP_ULT: {
+
+      /* transformations for < and > */
+
+      /* create a basic block which checks for the inverse predicate.
+       * if this is true we can go to the end if not we have to go to the
+       * bb which checks the lower half of the operands */
+      Instruction *op0_low, *op1_low;
+      CmpInst *    icmp_inv_cmp = nullptr;
+      BasicBlock * inv_cmp_bb =
+          BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb);
+      if (pred == CmpInst::ICMP_UGT) {
 
-  if (!icomps.size()) { return false; }
+        icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT,
+                                       op0_high, op1_high);
 
-  for (auto &IcmpInst : icomps) {
+      } else {
 
-    BasicBlock *bb = IcmpInst->getParent();
+        icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT,
+                                       op0_high, op1_high);
 
-    auto op0 = IcmpInst->getOperand(0);
-    auto op1 = IcmpInst->getOperand(1);
+      }
 
-    IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
-    if (!intTyOp0) { continue; }
-    unsigned     bitw = intTyOp0->getBitWidth();
-    IntegerType *IntType = IntegerType::get(C, bitw);
+      inv_cmp_bb->getInstList().push_back(icmp_inv_cmp);
+      worklist.push_back(icmp_inv_cmp);
 
-    /* get the new predicate */
-    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
-    if (!cmp_inst) { continue; }
-    auto               pred = cmp_inst->getPredicate();
-    CmpInst::Predicate new_pred;
+      auto term = bb->getTerminator();
+      term->eraseFromParent();
+      BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb);
 
-    if (pred == CmpInst::ICMP_SGT) {
+      /* create a bb which handles the cmp of the lower halves */
+      BasicBlock *cmp_low_bb =
+          BasicBlock::Create(C, "" /*"injected"*/, end_bb->getParent(), end_bb);
+      op0_low = new TruncInst(op0, NewIntType);
+      cmp_low_bb->getInstList().push_back(op0_low);
+      op1_low = new TruncInst(op1, NewIntType);
+      cmp_low_bb->getInstList().push_back(op1_low);
 
-      new_pred = CmpInst::ICMP_UGT;
+      icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low);
+      cmp_low_bb->getInstList().push_back(icmp_low);
+      BranchInst::Create(end_bb, cmp_low_bb);
 
-    } else {
+      BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb);
 
-      new_pred = CmpInst::ICMP_ULT;
+      PN = PHINode::Create(Int1Ty, 3);
+      PN->addIncoming(icmp_low, cmp_low_bb);
+      PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+      PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb);
+      break;
 
     }
 
-    BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+    default:
+      return false;
 
-    /* create a 1 bit compare for the sign bit. to do this shift and trunc
-     * the original operands so only the first bit remains.*/
-    Instruction *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit;
+  }
 
-    s_op0 = BinaryOperator::Create(Instruction::LShr, op0,
-                                   ConstantInt::get(IntType, bitw - 1));
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0);
-    t_op0 = new TruncInst(s_op0, Int1Ty);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0);
+  BasicBlock::iterator ii(cmp_inst);
+  ReplaceInstWithInst(cmp_inst->getParent()->getInstList(), ii, PN);
 
-    s_op1 = BinaryOperator::Create(Instruction::LShr, op1,
-                                   ConstantInt::get(IntType, bitw - 1));
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1);
-    t_op1 = new TruncInst(s_op1, Int1Ty);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1);
+  // We split the comparison into low and high. If this isn't our target
+  // bitwidth we recursivly split the low and high parts again until we have
+  // target bitwidth.
+  if ((bitw / 2) > target_bitwidth) {
 
-    /* compare of the sign bits */
-    icmp_sign_bit =
-        CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
-                             icmp_sign_bit);
+    worklist.push_back(icmp_high);
+    worklist.push_back(icmp_low);
 
-    /* create a new basic block which is executed if the signedness bit is
-     * different */
-    Instruction *icmp_inv_sig_cmp;
-    BasicBlock * sign_bb =
-        BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb);
-    if (pred == CmpInst::ICMP_SGT) {
+  }
 
-      /* if we check for > and the op0 positive and op1 negative then the final
-       * result is true. if op0 negative and op1 pos, the cmp must result
-       * in false
-       */
-      icmp_inv_sig_cmp =
-          CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1);
+  return true;
 
-    } else {
+}
+
+bool SplitComparesTransform::simplifyAndSplit(CmpInst *I, Module &M) {
+
+  CmpWorklist worklist;
+
+  auto op0 = I->getOperand(0);
+  auto op1 = I->getOperand(1);
+  if (!op0 || !op1) { return false; }
+  auto op0Ty = dyn_cast<IntegerType>(op0->getType());
+  if (!op0Ty || !isa<IntegerType>(op1->getType())) { return true; }
+
+  unsigned bitw = op0Ty->getBitWidth();
+
+#ifdef VERIFY_TOO_MUCH
+  auto F = I->getParent()->getParent();
+#endif
+
+  // we run the comparison simplification on all compares regardless of their
+  // bitwidth.
+  if (I->getPredicate() == CmpInst::ICMP_UGE ||
+      I->getPredicate() == CmpInst::ICMP_SGE ||
+      I->getPredicate() == CmpInst::ICMP_ULE ||
+      I->getPredicate() == CmpInst::ICMP_SLE) {
 
-      /* just the inverse of the above statement */
-      icmp_inv_sig_cmp =
-          CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1);
+    if (!simplifyOrEqualsCompare(I, M, worklist)) {
+
+      reportError(
+          "Failed to simplify inequality or equals comparison "
+          "(UGE,SGE,ULE,SLE)",
+          I, M);
 
     }
 
-    sign_bb->getInstList().push_back(icmp_inv_sig_cmp);
-    BranchInst::Create(end_bb, sign_bb);
+  } else if (I->getPredicate() == CmpInst::ICMP_SGT ||
 
-    /* create a new bb which is executed if signedness is equal */
-    Instruction *icmp_usign_cmp;
-    BasicBlock * middle_bb =
-        BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
-    /* we can do a normal unsigned compare now */
-    icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
-    middle_bb->getInstList().push_back(icmp_usign_cmp);
-    BranchInst::Create(end_bb, middle_bb);
+             I->getPredicate() == CmpInst::ICMP_SLT) {
 
-    auto term = bb->getTerminator();
-    /* if the sign is eq do a normal unsigned cmp, else we have to check the
-     * signedness bit */
-    BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb);
-    term->eraseFromParent();
+    if (!simplifySignedCompare(I, M, worklist)) {
 
-    PHINode *PN = PHINode::Create(Int1Ty, 2, "");
+      reportError("Failed to simplify signed comparison (SGT,SLT)", I, M);
+
+    }
+
+  }
+
+#ifdef VERIFY_TOO_MUCH
+  if (verifyFunction(*F, &errs())) {
+
+    reportError("simpliyfing compare lead to broken function", nullptr, M);
+
+  }
+
+#endif
+
+  // the simplification methods replace the original CmpInst and push the
+  // resulting new CmpInst into the worklist. If the worklist is empty then
+  // we only have to split the original CmpInst.
+  if (worklist.size() == 0) { worklist.push_back(I); }
+
+  while (!worklist.empty()) {
+
+    CmpInst *cmp = worklist.pop_back_val();
+    // we split the simplified compares into comparisons with smaller bitwidths
+    // if they are larger than our target_bitwidth.
+    if (bitw > target_bitwidth) {
+
+      if (!splitCompare(cmp, M, worklist)) {
+
+        reportError("Failed to split comparison", cmp, M);
+
+      }
+
+#ifdef VERIFY_TOO_MUCH
+      if (verifyFunction(*F, &errs())) {
+
+        reportError("splitting compare lead to broken function", nullptr, M);
+
+      }
 
-    PN->addIncoming(icmp_usign_cmp, middle_bb);
-    PN->addIncoming(icmp_inv_sig_cmp, sign_bb);
+#endif
 
-    BasicBlock::iterator ii(IcmpInst);
-    ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+    }
 
   }
 
+  count++;
   return true;
 
 }
@@ -1050,306 +1316,108 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
 
 }
 
-/* splits icmps of size bitw into two nested icmps with bitw/2 size each */
-size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) {
-
-  size_t count = 0;
-
-  LLVMContext &C = M.getContext();
-
-  IntegerType *Int1Ty = IntegerType::getInt1Ty(C);
-  IntegerType *OldIntType = IntegerType::get(C, bitw);
-  IntegerType *NewIntType = IntegerType::get(C, bitw / 2);
-
-  std::vector<Instruction *> icomps;
-
-  if (bitw % 2) { return 0; }
-
-  /* not supported yet */
-  if (bitw > 64) { return 0; }
-
-  /* get all EQ, NE, UGT, and ULT icmps of width bitw. if the
-   * functions simplifyCompares() and simplifyIntSignedness()
-   * were executed only these four predicates should exist */
-  for (auto &F : M) {
-
-    if (!isInInstrumentList(&F)) continue;
-
-    for (auto &BB : F) {
+bool SplitComparesTransform::runOnModule(Module &M) {
 
-      for (auto &IN : BB) {
+  char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
+  if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
+  if (bitw_env) { target_bitwidth = atoi(bitw_env); }
 
-        CmpInst *selectcmpInst = nullptr;
+  enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL;
 
-        if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+  if ((isatty(2) && getenv("AFL_QUIET") == NULL) ||
+      getenv("AFL_DEBUG") != NULL) {
 
-          if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_NE ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_UGT ||
-              selectcmpInst->getPredicate() == CmpInst::ICMP_ULT) {
+    errs() << "Split-compare-pass by laf.intel@gmail.com, extended by "
+              "heiko@hexco.de (splitting icmp to "
+           << target_bitwidth << " bit)\n";
 
-            auto op0 = selectcmpInst->getOperand(0);
-            auto op1 = selectcmpInst->getOperand(1);
+    if (getenv("AFL_DEBUG") != NULL && !debug) { debug = 1; }
 
-            IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
-            IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+  } else {
 
-            if (!intTyOp0 || !intTyOp1) { continue; }
+    be_quiet = 1;
 
-            /* check if the bitwidths are the one we are looking for */
-            if (intTyOp0->getBitWidth() != bitw ||
-                intTyOp1->getBitWidth() != bitw) {
+  }
 
-              continue;
+  if (enableFPSplit) {
 
-            }
+    count = splitFPCompares(M);
 
-            icomps.push_back(selectcmpInst);
+    /*
+        if (!be_quiet) {
 
-          }
+          errs() << "Split-floatingpoint-compare-pass: " << count
+                 << " FP comparisons split\n";
 
         }
 
-      }
-
-    }
+    */
+    simplifyFPCompares(M);
 
   }
 
-  if (!icomps.size()) { return 0; }
-
-  for (auto &IcmpInst : icomps) {
-
-    BasicBlock *bb = IcmpInst->getParent();
-
-    auto op0 = IcmpInst->getOperand(0);
-    auto op1 = IcmpInst->getOperand(1);
-
-    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
-    if (!cmp_inst) { continue; }
-    auto pred = cmp_inst->getPredicate();
-
-    BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
-
-    /* create the comparison of the top halves of the original operands */
-    Instruction *s_op0, *op0_high, *s_op1, *op1_high, *icmp_high;
-
-    s_op0 = BinaryOperator::Create(Instruction::LShr, op0,
-                                   ConstantInt::get(OldIntType, bitw / 2));
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0);
-    op0_high = new TruncInst(s_op0, NewIntType);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
-                             op0_high);
-
-    s_op1 = BinaryOperator::Create(Instruction::LShr, op1,
-                                   ConstantInt::get(OldIntType, bitw / 2));
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1);
-    op1_high = new TruncInst(s_op1, NewIntType);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
-                             op1_high);
-
-    icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high);
-    bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
-                             icmp_high);
-
-    /* now we have to destinguish between == != and > < */
-    if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) {
-
-      /* transformation for == and != icmps */
-
-      /* create a compare for the lower half of the original operands */
-      Instruction *op0_low, *op1_low, *icmp_low;
-      BasicBlock * cmp_low_bb =
-          BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
-
-      op0_low = new TruncInst(op0, NewIntType);
-      cmp_low_bb->getInstList().push_back(op0_low);
-
-      op1_low = new TruncInst(op1, NewIntType);
-      cmp_low_bb->getInstList().push_back(op1_low);
-
-      icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low);
-      cmp_low_bb->getInstList().push_back(icmp_low);
-      BranchInst::Create(end_bb, cmp_low_bb);
-
-      /* dependent on the cmp of the high parts go to the end or go on with
-       * the comparison */
-      auto term = bb->getTerminator();
-      if (pred == CmpInst::ICMP_EQ) {
-
-        BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb);
-
-      } else {
-
-        /* CmpInst::ICMP_NE */
-        BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb);
-
-      }
-
-      term->eraseFromParent();
-
-      /* create the PHI and connect the edges accordingly */
-      PHINode *PN = PHINode::Create(Int1Ty, 2, "");
-      PN->addIncoming(icmp_low, cmp_low_bb);
-      if (pred == CmpInst::ICMP_EQ) {
-
-        PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb);
+  std::vector<CmpInst *> worklist;
+  /* iterate over all functions, bbs and instruction search for all integer
+   * compare instructions. Save them into the worklist for later. */
+  for (auto &F : M) {
 
-      } else {
+    if (!isInInstrumentList(&F)) continue;
 
-        /* CmpInst::ICMP_NE */
-        PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+    for (auto &BB : F) {
 
-      }
+      for (auto &IN : BB) {
 
-      /* replace the old icmp with the new PHI */
-      BasicBlock::iterator ii(IcmpInst);
-      ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+        if (auto CI = dyn_cast<CmpInst>(&IN)) {
 
-    } else {
+          auto op0 = CI->getOperand(0);
+          auto op1 = CI->getOperand(1);
+          if (!op0 || !op1) { return false; }
+          auto iTy1 = dyn_cast<IntegerType>(op0->getType());
+          if (iTy1 && isa<IntegerType>(op1->getType())) {
 
-      /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */
-      /* transformations for < and > */
+            unsigned bitw = iTy1->getBitWidth();
+            if (isSupportedBitWidth(bitw)) { worklist.push_back(CI); }
 
-      /* create a basic block which checks for the inverse predicate.
-       * if this is true we can go to the end if not we have to go to the
-       * bb which checks the lower half of the operands */
-      Instruction *icmp_inv_cmp, *op0_low, *op1_low, *icmp_low;
-      BasicBlock * inv_cmp_bb =
-          BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb);
-      if (pred == CmpInst::ICMP_UGT) {
-
-        icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT,
-                                       op0_high, op1_high);
-
-      } else {
+          }
 
-        icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT,
-                                       op0_high, op1_high);
+        }
 
       }
 
-      inv_cmp_bb->getInstList().push_back(icmp_inv_cmp);
-
-      auto term = bb->getTerminator();
-      term->eraseFromParent();
-      BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb);
-
-      /* create a bb which handles the cmp of the lower halves */
-      BasicBlock *cmp_low_bb =
-          BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
-      op0_low = new TruncInst(op0, NewIntType);
-      cmp_low_bb->getInstList().push_back(op0_low);
-      op1_low = new TruncInst(op1, NewIntType);
-      cmp_low_bb->getInstList().push_back(op1_low);
-
-      icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low);
-      cmp_low_bb->getInstList().push_back(icmp_low);
-      BranchInst::Create(end_bb, cmp_low_bb);
-
-      BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb);
-
-      PHINode *PN = PHINode::Create(Int1Ty, 3);
-      PN->addIncoming(icmp_low, cmp_low_bb);
-      PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
-      PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb);
-
-      BasicBlock::iterator ii(IcmpInst);
-      ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
-
     }
 
-    ++count;
-
   }
 
-  return count;
-
-}
-
-bool SplitComparesTransform::runOnModule(Module &M) {
-
-  int    bitw = 64;
-  size_t count = 0;
-
-  char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
-  if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
-  if (bitw_env) { bitw = atoi(bitw_env); }
-
-  enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL;
-
-  if ((isatty(2) && getenv("AFL_QUIET") == NULL) ||
-      getenv("AFL_DEBUG") != NULL) {
+  // now that we have a list of all integer comparisons we can start replacing
+  // them with the splitted alternatives.
+  for (auto CI : worklist) {
 
-    printf(
-        "Split-compare-pass by laf.intel@gmail.com, extended by "
-        "heiko@hexco.de\n");
-
-  } else {
-
-    be_quiet = 1;
+    simplifyAndSplit(CI, M);
 
   }
 
-  if (enableFPSplit) {
-
-    count = splitFPCompares(M);
-
-    /*
-        if (!be_quiet) {
-
-          errs() << "Split-floatingpoint-compare-pass: " << count
-                 << " FP comparisons split\n";
+  bool brokenDebug = false;
+  if (verifyModule( M, &errs()
+#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
+    ,&brokenDebug		// 9th May 2016
+#endif
+    )) {
 
-        }
-
-    */
-    simplifyFPCompares(M);
+    reportError(
+        "Module Verifier failed! Consider reporting a bug with the AFL++ "
+        "project.",
+        nullptr, M);
 
   }
 
-  simplifyCompares(M);
-
-  simplifyIntSignedness(M);
+  if (brokenDebug) {
 
-  switch (bitw) {
-
-    case 64:
-      count += splitIntCompares(M, bitw);
-      if (debug)
-        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
-               << " split\n";
-      bitw >>= 1;
-#if LLVM_VERSION_MAJOR > 3 || \
-    (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
-      [[clang::fallthrough]]; /*FALLTHRU*/                   /* FALLTHROUGH */
-#endif
-    case 32:
-      count += splitIntCompares(M, bitw);
-      if (debug)
-        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
-               << " split\n";
-      bitw >>= 1;
-#if LLVM_VERSION_MAJOR > 3 || \
-    (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
-      [[clang::fallthrough]]; /*FALLTHRU*/                   /* FALLTHROUGH */
-#endif
-    case 16:
-      count += splitIntCompares(M, bitw);
-      if (debug)
-        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
-               << " split\n";
-      // bitw >>= 1;
-      break;
-
-    default:
-      // if (!be_quiet) errs() << "NOT Running split-compare-pass \n";
-      return false;
-      break;
+    reportError("Module Verifier reported broken Debug Infos - Stripping!",
+                nullptr, M);
+    StripDebugInfo(M);
 
   }
 
-  verifyModule(M);
   return true;
 
 }
@@ -1373,3 +1441,8 @@ static RegisterStandardPasses RegisterSplitComparesTransPassLTO(
     registerSplitComparesPass);
 #endif
 
+static RegisterPass<SplitComparesTransform> X("splitcompares",
+                                              "AFL++ split compares",
+                                              true /* Only looks at CFG */,
+                                              true /* Analysis Pass */);
+
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index 606254d9..dbf2920f 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -167,7 +167,7 @@ static inline u8 anything_set(void) {
 
 static void at_exit_handler(void) {
 
-  unlink(fsrv.out_file);                                         /* Ignore errors */
+  unlink(fsrv.out_file);                                   /* Ignore errors */
 
 }
 
@@ -643,12 +643,14 @@ static void set_up_environment(char **argv) {
 
     }
 
-    fsrv.out_file = alloc_printf("%s/.afl-analyze-temp-%u", use_dir, (u32)getpid());
+    fsrv.out_file =
+        alloc_printf("%s/.afl-analyze-temp-%u", use_dir, (u32)getpid());
 
   }
 
   unlink(fsrv.out_file);
-  fsrv.out_fd = open(fsrv.out_file, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
+  fsrv.out_fd =
+      open(fsrv.out_file, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
   if (fsrv.out_fd < 0) { PFATAL("Unable to create '%s'", fsrv.out_file); }
 
@@ -1118,7 +1120,6 @@ int main(int argc, char **argv_orig, char **envp) {
   if (fsrv.target_path) { ck_free(fsrv.target_path); }
   if (in_data) { ck_free(in_data); }
 
-
   exit(0);
 
 }
diff --git a/src/afl-common.c b/src/afl-common.c
index c61ce3d8..9ca2b3e8 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -751,6 +751,8 @@ void read_bitmap(u8 *fname, u8 *map, size_t len) {
 
 }
 
+/* Get unix time in milliseconds */
+
 u64 get_cur_time(void) {
 
   struct timeval  tv;
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index c3ce2edd..11adebf4 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -73,7 +73,7 @@ static int select_algorithm(afl_state_t *afl, u32 max_algorithm) {
 /* Helper to choose random block len for block operations in fuzz_one().
    Doesn't return zero, provided that max_len is > 0. */
 
-static u32 choose_block_len(afl_state_t *afl, u32 limit) {
+static inline u32 choose_block_len(afl_state_t *afl, u32 limit) {
 
   u32 min_value, max_value;
   u32 rlim = MIN(afl->queue_cycle, (u32)3);
diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index b41ffa88..268f726c 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -252,7 +252,7 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
   u64 start_time = get_cur_time();
 #endif
 
-  u32 screen_update = 1000000 / afl->queue_cur->exec_us;
+  u32 screen_update;
   u64 orig_hit_cnt, new_hit_cnt, exec_cksum;
   orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
@@ -261,6 +261,24 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
   afl->stage_max = (len << 1);
   afl->stage_cur = 0;
 
+  if (likely(afl->queue_cur->exec_us)) {
+
+    if (likely((100000 / 2) >= afl->queue_cur->exec_us)) {
+
+      screen_update = 100000 / afl->queue_cur->exec_us;
+
+    } else {
+
+      screen_update = 1;
+
+    }
+
+  } else {
+
+    screen_update = 100000;
+
+  }
+
   // in colorization we do not classify counts, hence we have to calculate
   // the original checksum.
   if (unlikely(get_exec_checksum(afl, buf, len, &exec_cksum))) {
@@ -905,17 +923,16 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
       // test for arithmetic, eg. "if ((user_val - 0x1111) == 0x1234) ..."
       s64 diff = pattern - b_val;
       s64 o_diff = o_pattern - o_b_val;
-      /*
-            fprintf(stderr, "DIFF1 idx=%03u shape=%02u %llx-%llx=%lx\n", idx,
-                    h->shape + 1, o_pattern, o_b_val, o_diff);
-            fprintf(stderr, "DIFF1 %016llx %llx-%llx=%lx\n", repl, pattern,
-         b_val, diff);*/
+      /* fprintf(stderr, "DIFF1 idx=%03u shape=%02u %llx-%llx=%lx\n", idx,
+                 h->shape + 1, o_pattern, o_b_val, o_diff);
+         fprintf(stderr, "DIFF1 %016llx %llx-%llx=%lx\n", repl, pattern,
+                 b_val, diff); */
       if (diff == o_diff && diff) {
 
         // this could be an arithmetic transformation
 
         u64 new_repl = (u64)((s64)repl - diff);
-        //        fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+        // fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
 
         if (unlikely(cmp_extend_encoding(
                 afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
@@ -935,15 +952,17 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
         diff = pattern ^ b_val;
         s64 o_diff = o_pattern ^ o_b_val;
 
-        /*        fprintf(stderr, "DIFF2 idx=%03u shape=%02u %llx-%llx=%lx\n",
-           idx, h->shape + 1, o_pattern, o_b_val, o_diff); fprintf(stderr,
-           "DIFF2 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);*/
+        /* fprintf(stderr, "DIFF2 idx=%03u shape=%02u %llx-%llx=%lx\n",
+                   idx, h->shape + 1, o_pattern, o_b_val, o_diff);
+           fprintf(stderr,
+                   "DIFF2 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);
+        */
         if (diff == o_diff && diff) {
 
           // this could be a XOR transformation
 
           u64 new_repl = (u64)((s64)repl ^ diff);
-          //          fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+          // fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
 
           if (unlikely(cmp_extend_encoding(
                   afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
@@ -982,15 +1001,17 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
         }
 
-        /*        fprintf(stderr, "DIFF3 idx=%03u shape=%02u %llx-%llx=%lx\n",
-           idx, h->shape + 1, o_pattern, o_b_val, o_diff); fprintf(stderr,
-           "DIFF3 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);*/
+        /* fprintf(stderr, "DIFF3 idx=%03u shape=%02u %llx-%llx=%lx\n",
+                   idx, h->shape + 1, o_pattern, o_b_val, o_diff);
+           fprintf(stderr,
+                   "DIFF3 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);
+        */
         if (o_diff && diff) {
 
           // this could be a lower to upper
 
           u64 new_repl = (repl & (0x5f5f5f5f5f5f5f5f & mask));
-          //          fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+          // fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
 
           if (unlikely(cmp_extend_encoding(
                   afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
@@ -1029,15 +1050,17 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
         }
 
-        /*        fprintf(stderr, "DIFF4 idx=%03u shape=%02u %llx-%llx=%lx\n",
-           idx, h->shape + 1, o_pattern, o_b_val, o_diff); fprintf(stderr,
-           "DIFF4 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);*/
+        /* fprintf(stderr, "DIFF4 idx=%03u shape=%02u %llx-%llx=%lx\n",
+                   idx, h->shape + 1, o_pattern, o_b_val, o_diff);
+           fprintf(stderr,
+                   "DIFF4 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);
+        */
         if (o_diff && diff) {
 
           // this could be a lower to upper
 
           u64 new_repl = (repl | (0x2020202020202020 & mask));
-          //          fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
+          // fprintf(stderr, "SAME DIFF %llx->%llx\n", repl, new_repl);
 
           if (unlikely(cmp_extend_encoding(
                   afl, h, pattern, new_repl, o_pattern, repl, IS_TRANSFORM, idx,
@@ -1383,7 +1406,8 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
   }
 
-  //#endif                                           /* CMPLOG_SOLVE_ARITHMETIC
+  //#endif                                           /*
+  // CMPLOG_SOLVE_ARITHMETIC
 
   return 0;
 
@@ -2152,7 +2176,8 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
         memcpy(buf + idx, tmp, i + 1);
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-        // fprintf(stderr, "RTN ATTEMPT tohex %u result %u\n", tohex, *status);
+        // fprintf(stderr, "RTN ATTEMPT tohex %u result %u\n", tohex,
+        // *status);
 
       }
 
@@ -2235,7 +2260,8 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
         for (j = 0; j <= i; j++)
           buf[idx + j] = repl[j] - arith_val[j];
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-        // fprintf(stderr, "RTN ATTEMPT arith %u result %u\n", arith, *status);
+        // fprintf(stderr, "RTN ATTEMPT arith %u result %u\n", arith,
+        // *status);
 
       }
 
@@ -2328,16 +2354,17 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
 
     /*
       struct cmp_header *hh = &afl->orig_cmp_map->headers[key];
-    fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits, h->id,
-    h->shape, h->attribute); for (j = 0; j < 8; j++) fprintf(stderr, "%02x",
-    o->v0[j]); fprintf(stderr, " v1="); for (j = 0; j < 8; j++) fprintf(stderr,
-    "%02x", o->v1[j]); fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u
-    o0=", hh->hits, hh->id, hh->shape, hh->attribute); for (j = 0; j < 8; j++)
-      fprintf(stderr, "%02x", orig_o->v0[j]);
-    fprintf(stderr, " o1=");
-    for (j = 0; j < 8; j++)
-      fprintf(stderr, "%02x", orig_o->v1[j]);
-    fprintf(stderr, "\n");
+      fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits,
+              h->id, h->shape, h->attribute);
+      for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v0[j]);
+      fprintf(stderr, " v1=");
+      for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v1[j]);
+      fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u o0=",
+              hh->hits, hh->id, hh->shape, hh->attribute);
+      for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v0[j]);
+      fprintf(stderr, " o1=");
+      for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v1[j]);
+      fprintf(stderr, "\n");
     */
 
     t = taint;
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index 493735ff..49856a9f 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -314,7 +314,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
   ++q->cal_failed;
 
   afl->stage_name = "calibration";
-  afl->stage_max = afl->fast_cal ? 3 : CAL_CYCLES;
+  afl->stage_max = afl->afl_env.afl_cal_fast ? 3 : CAL_CYCLES;
 
   /* Make sure the forkserver is up before we do anything, and let's not
      count its spin-up time toward binary calibration. */
@@ -355,6 +355,12 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
   for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max; ++afl->stage_cur) {
 
+    if (unlikely(afl->debug)) {
+
+      DEBUGF("calibration stage %d/%d\n", afl->stage_cur + 1, afl->stage_max);
+
+    }
+
     u64 cksum;
 
     write_to_testcase(afl, use_mem, q->len);
@@ -402,8 +408,24 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
         }
 
+        if (unlikely(!var_detected)) {
+
+          // note: from_queue seems to only be set during initialization
+          if (afl->afl_env.afl_no_ui || from_queue) {
+
+            WARNF("instability detected during calibration\n");
+
+          } else if (afl->debug) {
+
+            DEBUGF("instability detected during calibration\n");
+
+          }
+
+        }
+
         var_detected = 1;
-        afl->stage_max = afl->fast_cal ? CAL_CYCLES : CAL_CYCLES_LONG;
+        afl->stage_max =
+            afl->afl_env.afl_cal_fast ? CAL_CYCLES : CAL_CYCLES_LONG;
 
       } else {
 
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 0658070e..b832c11e 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -96,8 +96,6 @@ void afl_state_init(afl_state_t *afl, uint32_t map_size) {
   afl->splicing_with = -1;              /* Splicing with which test case?   */
   afl->cpu_to_bind = -1;
   afl->havoc_stack_pow2 = HAVOC_STACK_POW2;
-  afl->cal_cycles = CAL_CYCLES;
-  afl->cal_cycles_long = CAL_CYCLES_LONG;
   afl->hang_tmout = EXEC_TIMEOUT;
   afl->exit_on_time = 0;
   afl->stats_update_freq = 1;
@@ -341,6 +339,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_cal_fast =
                 get_afl_env(afl_environment_variables[i]) ? 1 : 0;
 
+          } else if (!strncmp(env, "AFL_FAST_CAL",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_cal_fast =
+                get_afl_env(afl_environment_variables[i]) ? 1 : 0;
+
           } else if (!strncmp(env, "AFL_STATSD",
 
                               afl_environment_variable_len)) {
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 9a3780fb..e9a67ac5 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -1276,7 +1276,6 @@ int main(int argc, char **argv_orig, char **envp) {
   if (get_afl_env("AFL_NO_CPU_RED")) { afl->no_cpu_meter_red = 1; }
   if (get_afl_env("AFL_NO_ARITH")) { afl->no_arith = 1; }
   if (get_afl_env("AFL_SHUFFLE_QUEUE")) { afl->shuffle_queue = 1; }
-  if (get_afl_env("AFL_FAST_CAL")) { afl->fast_cal = 1; }
   if (get_afl_env("AFL_EXPAND_HAVOC_NOW")) { afl->expand_havoc = 1; }
 
   if (afl->afl_env.afl_autoresume) {
@@ -1489,14 +1488,6 @@ int main(int argc, char **argv_orig, char **envp) {
   check_if_tty(afl);
   if (afl->afl_env.afl_force_ui) { afl->not_on_tty = 0; }
 
-  if (afl->afl_env.afl_cal_fast) {
-
-    /* Use less calibration cycles, for slow applications */
-    afl->cal_cycles = 3;
-    afl->cal_cycles_long = 5;
-
-  }
-
   if (afl->afl_env.afl_custom_mutator_only) {
 
     /* This ensures we don't proceed to havoc/splice */
diff --git a/test/test-basic.sh b/test/test-basic.sh
index b4bb9df2..c39faa74 100755
--- a/test/test-basic.sh
+++ b/test/test-basic.sh
@@ -56,11 +56,6 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     CODE=1
   }
   # now we want to be sure that afl-fuzz is working
-  # make sure core_pattern is set to core on linux
-  (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
-    $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
-    true
-  }) ||
   # make sure crash reporter is disabled on Mac OS X
   (test "$(uname -s)" = "Darwin" && test $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') && {
     $ECHO "$RED[!] we cannot run afl-fuzz with enabled crash reporter. Run 'sudo sh afl-system-config'.$RESET"
@@ -176,11 +171,6 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     CODE=1
   }
   # now we want to be sure that afl-fuzz is working
-  # make sure core_pattern is set to core on linux
-  (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
-    $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
-    true
-  }) ||
   # make sure crash reporter is disabled on Mac OS X
   (test "$(uname -s)" = "Darwin" && test $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') && {
     $ECHO "$RED[!] we cannot run afl-fuzz with enabled crash reporter. Run 'sudo sh afl-system-config'.$RESET"
diff --git a/test/test-gcc-plugin.sh b/test/test-gcc-plugin.sh
index 4c36b6c9..50d83e40 100755
--- a/test/test-gcc-plugin.sh
+++ b/test/test-gcc-plugin.sh
@@ -52,10 +52,6 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
     CODE=1
   }
   # now we want to be sure that afl-fuzz is working
-  (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
-    $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
-    true
-  }) ||
   # make sure crash reporter is disabled on Mac OS X
   (test "$(uname -s)" = "Darwin" && test $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') && {
     $ECHO "$RED[!] we cannot run afl-fuzz with enabled crash reporter. Run 'sudo sh afl-system-config'.$RESET"
diff --git a/test/test-int_cases.c b/test/test-int_cases.c
new file mode 100644
index 00000000..c76206c5
--- /dev/null
+++ b/test/test-int_cases.c
@@ -0,0 +1,424 @@
+/* test cases for integer comparison transformations
+ * compile with -DINT_TYPE="signed char"
+ *          or  -DINT_TYPE="short"
+ *          or  -DINT_TYPE="int"
+ *          or  -DINT_TYPE="long"
+ *          or  -DINT_TYPE="long long"
+ */
+
+#include <assert.h>
+
+int main() {
+
+  volatile INT_TYPE a, b;
+  /* different values */
+  a = -21;
+  b = -2;                          /* signs equal */
+  assert((a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert(!(a >= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = 1;
+  b = 8;                           /* signs equal */
+  assert((a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert(!(a >= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  if ((unsigned)(INT_TYPE)(~0) > 255) { /* short or bigger */
+    volatile short a, b;
+    a = 2;
+    b = 256+1;                            /* signs equal */
+    assert((a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert(!(a >= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    a = -1 - 256;
+    b = -8;                          /* signs equal */
+    assert((a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert(!(a >= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    if ((unsigned)(INT_TYPE)(~0) > 65535) { /* int or bigger */
+      volatile int a, b;
+      a = 2;
+      b = 65536+1;                            /* signs equal */
+      assert((a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert(!(a >= b));
+      assert((a != b));
+      assert(!(a == b));
+
+      a = -1 - 65536;
+      b = -8;                          /* signs equal */
+      assert((a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert(!(a >= b));
+      assert((a != b));
+      assert(!(a == b));
+
+      if ((unsigned)(INT_TYPE)(~0) > 4294967295) { /* long or bigger */
+        volatile long a, b;
+        a = 2;
+        b = 4294967296+1;                            /* signs equal */
+        assert((a < b));
+        assert((a <= b));
+        assert(!(a > b));
+        assert(!(a >= b));
+        assert((a != b));
+        assert(!(a == b));
+
+        a = -1 - 4294967296;
+        b = -8;                          /* signs equal */
+        assert((a < b));
+        assert((a <= b));
+        assert(!(a > b));
+        assert(!(a >= b));
+        assert((a != b));
+        assert(!(a == b));
+
+      }
+    }
+  }
+
+  a = -1;
+  b = 1;                         /* signs differ */
+  assert((a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert(!(a >= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = -1;
+  b = 0;                        /* signs differ */
+  assert((a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert(!(a >= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = -2;
+  b = 8;                           /* signs differ */
+  assert((a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert(!(a >= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = -1;
+  b = -2;                           /* signs equal */
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = 8;
+  b = 1;                           /* signs equal */
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  if ((unsigned)(INT_TYPE)(~0) > 255) {
+    volatile short a, b;
+    a = 1 + 256;
+    b = 3;                              /* signs equal */
+    assert((a > b));
+    assert((a >= b));
+    assert(!(a < b));
+    assert(!(a <= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    a = -1;
+    b = -256;                            /* signs equal */
+    assert((a > b));
+    assert((a >= b));
+    assert(!(a < b));
+    assert(!(a <= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    if ((unsigned)(INT_TYPE)(~0) > 65535) {
+      volatile int a, b;
+      a = 1 + 65536;
+      b = 3;                              /* signs equal */
+      assert((a > b));
+      assert((a >= b));
+      assert(!(a < b));
+      assert(!(a <= b));
+      assert((a != b));
+      assert(!(a == b));
+
+      a = -1;
+      b = -65536;                            /* signs equal */
+      assert((a > b));
+      assert((a >= b));
+      assert(!(a < b));
+      assert(!(a <= b));
+      assert((a != b));
+      assert(!(a == b));
+
+      if ((unsigned)(INT_TYPE)(~0) > 4294967295) {
+        volatile long a, b;
+        a = 1 + 4294967296;
+        b = 3;                              /* signs equal */
+        assert((a > b));
+        assert((a >= b));
+        assert(!(a < b));
+        assert(!(a <= b));
+        assert((a != b));
+        assert(!(a == b));
+ 
+        a = -1;
+        b = -4294967296;                   /* signs equal */
+        assert((a > b));
+        assert((a >= b));
+        assert(!(a < b));
+        assert(!(a <= b));
+        assert((a != b));
+        assert(!(a == b));
+      }
+    }
+  }
+
+  a = 1;
+  b = -1;                        /* signs differ */
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = 0;
+  b = -1;                       /* signs differ */
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = 8;
+  b = -2;                            /* signs differ */
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  a = 1;
+  b = -2;                           /* signs differ */
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  if ((unsigned)(INT_TYPE)(~0) > 255) {
+    volatile short a, b;
+    a = 1 + 256;
+    b = -2;                           /* signs differ */
+    assert((a > b));
+    assert((a >= b));
+    assert(!(a < b));
+    assert(!(a <= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    a = -1;
+    b = -2 - 256;                     /* signs differ */
+    assert((a > b));
+    assert((a >= b));
+    assert(!(a < b));
+    assert(!(a <= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    if ((unsigned)(INT_TYPE)(~0) > 65535) {
+      volatile int a, b;
+      a = 1 + 65536;
+      b = -2;                           /* signs differ */
+      assert((a > b));
+      assert((a >= b));
+      assert(!(a < b));
+      assert(!(a <= b));
+      assert((a != b));
+      assert(!(a == b));
+ 
+      a = -1;
+      b = -2 - 65536;                  /* signs differ */
+      assert((a > b));
+      assert((a >= b));
+      assert(!(a < b));
+      assert(!(a <= b));
+      assert((a != b));
+      assert(!(a == b));
+
+      if ((unsigned)(INT_TYPE)(~0) > 4294967295) {
+        volatile long a, b;
+        a = 1 + 4294967296;
+        b = -2;                           /* signs differ */
+        assert((a > b));
+        assert((a >= b));
+        assert(!(a < b));
+        assert(!(a <= b));
+        assert((a != b));
+        assert(!(a == b));
+  
+        a = -1;
+        b = -2 - 4294967296;              /* signs differ */
+        assert((a > b));
+        assert((a >= b));
+        assert(!(a < b));
+        assert(!(a <= b));
+        assert((a != b));
+        assert(!(a == b));
+
+      }
+    }
+  }
+
+  /* equal values */
+  a = 0;
+  b = 0;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  a = -0;
+  b = 0;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  a = 1;
+  b = 1;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  a = 5;
+  b = 5;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  a = -1;
+  b = -1;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  a = -5;
+  b = -5;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  if ((unsigned)(INT_TYPE)(~0) > 255) {
+    volatile short a, b;
+    a = 1 + 256;
+    b = 1 + 256;
+    assert(!(a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert((a >= b));
+    assert(!(a != b));
+    assert((a == b));
+
+    a = -2 - 256;
+    b = -2 - 256;
+    assert(!(a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert((a >= b));
+    assert(!(a != b));
+    assert((a == b));
+
+    if ((unsigned)(INT_TYPE)(~0) > 65535) {
+      volatile int a, b;
+      a = 1 + 65536;
+      b = 1 + 65536;
+      assert(!(a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert((a >= b));
+      assert(!(a != b));
+      assert((a == b));
+ 
+      a = -2 - 65536;
+      b = -2 - 65536;
+      assert(!(a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert((a >= b));
+      assert(!(a != b));
+      assert((a == b));
+
+      if ((unsigned)(INT_TYPE)(~0) > 4294967295) {
+        volatile long a, b;
+        a = 1 + 4294967296;
+        b = 1 + 4294967296;
+        assert(!(a < b));
+        assert((a <= b));
+        assert(!(a > b));
+        assert((a >= b));
+        assert(!(a != b));
+        assert((a == b));
+  
+        a = -2 - 4294967296;
+        b = -2 - 4294967296;
+        assert(!(a < b));
+        assert((a <= b));
+        assert(!(a > b));
+        assert((a >= b));
+        assert(!(a != b));
+        assert((a == b));
+  
+      }
+    }
+  }
+}
+
diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index 7cdc83cb..8090e176 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -122,10 +122,6 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
     CODE=1
   }
   # now we want to be sure that afl-fuzz is working
-  (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
-    $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
-    true
-  }) ||
   # make sure crash reporter is disabled on Mac OS X
   (test "$(uname -s)" = "Darwin" && test $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') && {
     $ECHO "$RED[!] we cannot run afl-fuzz with enabled crash reporter. Run 'sudo sh afl-system-config'.$RESET"
@@ -190,6 +186,29 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   }
   rm -f test-instr.plain
 
+  $ECHO "$GREY[*] llvm_mode laf-intel/compcov testing splitting integer types (this might take some time)"
+  for testcase in ./test-int_cases.c ./test-uint_cases.c; do
+    for I in char short int long "long long"; do
+      for BITS in 8 16 32 64; do
+        bin="$testcase-split-$I-$BITS.compcov" 
+        AFL_LLVM_INSTRUMENT=AFL AFL_DEBUG=1 AFL_LLVM_LAF_SPLIT_COMPARES_BITW=$BITS AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast -DINT_TYPE="$I" -o "$bin" "$testcase" > test.out 2>&1;
+        if ! test -e "$bin"; then
+            cat test.out
+            $ECHO "$RED[!] llvm_mode laf-intel/compcov integer splitting failed! ($testcase with type $I split to $BITS)!";
+            CODE=1
+            break
+        fi
+        if ! "$bin"; then
+            $ECHO "$RED[!] llvm_mode laf-intel/compcov integer splitting resulted in miscompilation (type $I split to $BITS)!";
+            CODE=1
+            break
+        fi
+        rm -f "$bin" test.out || true
+      done
+    done
+  done
+  rm -f test-int-split*.compcov test.out
+
   AFL_LLVM_INSTRUMENT=AFL AFL_DEBUG=1 AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast -o test-compcov.compcov test-compcov.c > test.out 2>&1
   test -e test-compcov.compcov && test_compcov_binary_functionality ./test-compcov.compcov && {
     grep --binary-files=text -Eq " [ 123][0-9][0-9] location| [3-9][0-9] location" test.out && {
diff --git a/test/test-uint_cases.c b/test/test-uint_cases.c
new file mode 100644
index 00000000..a277e28a
--- /dev/null
+++ b/test/test-uint_cases.c
@@ -0,0 +1,217 @@
+/*
+ * compile with -DINT_TYPE="char"
+ *          or  -DINT_TYPE="short"
+ *          or  -DINT_TYPE="int"
+ *          or  -DINT_TYPE="long"
+ *          or  -DINT_TYPE="long long"
+ */
+
+#include <assert.h>
+
+int main() {
+
+  volatile unsigned INT_TYPE a, b;
+
+  a = 1;
+  b = 8;
+  assert((a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert(!(a >= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  if ((INT_TYPE)(~0) > 255) {
+    volatile unsigned short a, b;
+    a = 256+2;
+    b = 256+21;
+    assert((a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert(!(a >= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    a = 21;
+    b = 256+1;
+    assert((a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert(!(a >= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    if ((INT_TYPE)(~0) > 65535) {
+      volatile unsigned int a, b;
+      a = 65536+2;
+      b = 65536+21;
+      assert((a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert(!(a >= b));
+      assert((a != b));
+      assert(!(a == b));
+ 
+      a = 21;
+      b = 65536+1;
+      assert((a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert(!(a >= b));
+      assert((a != b));
+      assert(!(a == b));
+    }
+
+    if ((INT_TYPE)(~0) > 4294967295) {
+      volatile unsigned long a, b;
+      a = 4294967296+2;
+      b = 4294967296+21;
+      assert((a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert(!(a >= b));
+      assert((a != b));
+      assert(!(a == b));
+ 
+      a = 21;
+      b = 4294967296+1;
+      assert((a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert(!(a >= b));
+      assert((a != b));
+      assert(!(a == b));
+    }
+  }
+
+  a = 8;
+  b = 1;
+  assert((a > b));
+  assert((a >= b));
+  assert(!(a < b));
+  assert(!(a <= b));
+  assert((a != b));
+  assert(!(a == b));
+
+  if ((INT_TYPE)(~0) > 255) {
+    volatile unsigned short a, b;
+    a = 256+2;
+    b = 256+1;
+    assert((a > b));
+    assert((a >= b));
+    assert(!(a < b));
+    assert(!(a <= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    a = 256+2;
+    b = 6;
+    assert((a > b));
+    assert((a >= b));
+    assert(!(a < b));
+    assert(!(a <= b));
+    assert((a != b));
+    assert(!(a == b));
+
+    if ((INT_TYPE)(~0) > 65535) {
+      volatile unsigned int a, b;
+      a = 65536+2;
+      b = 65536+1;
+      assert((a > b));
+      assert((a >= b));
+      assert(!(a < b));
+      assert(!(a <= b));
+      assert((a != b));
+      assert(!(a == b));
+ 
+      a = 65536+2;
+      b = 6;
+      assert((a > b));
+      assert((a >= b));
+      assert(!(a < b));
+      assert(!(a <= b));
+      assert((a != b));
+      assert(!(a == b));
+
+      if ((INT_TYPE)(~0) > 4294967295) {
+        volatile unsigned long a, b;
+        a = 4294967296+2;
+        b = 4294967296+1;
+        assert((a > b));
+        assert((a >= b));
+        assert(!(a < b));
+        assert(!(a <= b));
+        assert((a != b));
+        assert(!(a == b));
+  
+        a = 4294967296+2;
+        b = 6;
+        assert((a > b));
+        assert((a >= b));
+        assert(!(a < b));
+        assert(!(a <= b));
+        assert((a != b));
+        assert(!(a == b));
+
+      }
+    }
+  }
+
+
+  a = 0;
+  b = 0;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  a = 1;
+  b = 1;
+  assert(!(a < b));
+  assert((a <= b));
+  assert(!(a > b));
+  assert((a >= b));
+  assert(!(a != b));
+  assert((a == b));
+
+  if ((INT_TYPE)(~0) > 255) {
+    volatile unsigned short a, b;
+    a = 256+5;
+    b = 256+5;
+    assert(!(a < b));
+    assert((a <= b));
+    assert(!(a > b));
+    assert((a >= b));
+    assert(!(a != b));
+    assert((a == b));
+
+    if ((INT_TYPE)(~0) > 65535) {
+      volatile unsigned int a, b;
+      a = 65536+5;
+      b = 65536+5;
+      assert(!(a < b));
+      assert((a <= b));
+      assert(!(a > b));
+      assert((a >= b));
+      assert(!(a != b));
+      assert((a == b));
+
+      if ((INT_TYPE)(~0) > 4294967295) {
+        volatile unsigned long a, b;
+        a = 4294967296+5;
+        b = 4294967296+5;
+        assert(!(a < b));
+        assert((a <= b));
+        assert(!(a > b));
+        assert((a >= b));
+        assert(!(a != b));
+        assert((a == b));
+      }
+    }
+
+  }
+
+}
+
diff --git a/unicorn_mode/samples/speedtest/rust/src/main.rs b/unicorn_mode/samples/speedtest/rust/src/main.rs
index 1e35ff0b..9ea1b873 100644
--- a/unicorn_mode/samples/speedtest/rust/src/main.rs
+++ b/unicorn_mode/samples/speedtest/rust/src/main.rs
@@ -48,7 +48,7 @@ fn parse_locs(loc_name: &str) -> Result<Vec<u64>, io::Error> {
     let contents = &read_file(&format!("../target.offsets.{}", loc_name))?;
     //println!("Read: {:?}", contents);
     Ok(str_from_u8_unchecked(&contents)
-        .split("\n")
+        .split('\n')
         .map(|x| {
             //println!("Trying to convert {}", &x[2..]);
             let result = u64::from_str_radix(&x[2..], 16);
@@ -90,7 +90,8 @@ fn fuzz(input_file: &str) -> Result<(), uc_error> {
     let mut unicorn = Unicorn::new(Arch::X86, Mode::MODE_64, 0)?;
     let mut uc: UnicornHandle<'_, _> = unicorn.borrow();
 
-    let binary = read_file(BINARY).expect(&format!("Could not read modem image: {}", BINARY));
+    let binary =
+        read_file(BINARY).unwrap_or_else(|_| panic!("Could not read modem image: {}", BINARY));
     let _aligned_binary_size = align(binary.len() as u64);
     // Apply constraints to the mutated input
     if binary.len() as u64 > CODE_SIZE_MAX {
@@ -151,7 +152,7 @@ fn fuzz(input_file: &str) -> Result<(), uc_error> {
         already_allocated_malloc.set(true);
     };
 
-    let already_allocated_free = already_allocated.clone();
+    let already_allocated_free = already_allocated;
     // No real free, just set the "used"-flag to false.
     let hook_free = move |mut uc: UnicornHandle<'_, _>, addr, size| {
         if already_allocated_free.get() {
@@ -190,7 +191,7 @@ fn fuzz(input_file: &str) -> Result<(), uc_error> {
     }
 
     for addr in parse_locs("magicfn").unwrap() {
-        uc.add_code_hook(addr, addr, Box::new(hook_magicfn.clone()))?;
+        uc.add_code_hook(addr, addr, Box::new(hook_magicfn))?;
     }
 
     let place_input_callback =
@@ -225,7 +226,7 @@ fn fuzz(input_file: &str) -> Result<(), uc_error> {
 
     match ret {
         Ok(_) => {}
-        Err(e) => panic!(format!("found non-ok unicorn exit: {:?}", e)),
+        Err(e) => panic!("found non-ok unicorn exit: {:?}", e),
     }
 
     Ok(())
diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c
index c094c425..ff42f3b9 100644
--- a/utils/aflpp_driver/aflpp_driver.c
+++ b/utils/aflpp_driver/aflpp_driver.c
@@ -204,21 +204,23 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) {
 
 int main(int argc, char **argv) {
 
-  printf(
-      "============================== INFO ================================\n"
-      "This binary is built for afl++.\n"
-      "To use with afl-cmin or afl-cmin.bash pass '-' as single command line "
-      "option\n"
-      "To run the target function on individual input(s) execute this:\n"
-      "  %s INPUT_FILE1 [INPUT_FILE2 ... ]\n"
-      "To fuzz with afl-fuzz execute this:\n"
-      "  afl-fuzz [afl-flags] -- %s [-N]\n"
-      "afl-fuzz will run N iterations before re-spawning the process (default: "
-      "INT_MAX)\n"
-      "For stdin input processing, pass '-' as single command line option.\n"
-      "For file input processing, pass '@@' as single command line option.\n"
-      "===================================================================\n",
-      argv[0], argv[0]);
+  if (argc < 2 || strncmp(argv[1], "-h", 2) == 0)
+    printf(
+        "============================== INFO ================================\n"
+        "This binary is built for afl++.\n"
+        "To use with afl-cmin or afl-cmin.bash pass '-' as single command line "
+        "option\n"
+        "To run the target function on individual input(s) execute this:\n"
+        "  %s INPUT_FILE1 [INPUT_FILE2 ... ]\n"
+        "To fuzz with afl-fuzz execute this:\n"
+        "  afl-fuzz [afl-flags] -- %s [-N]\n"
+        "afl-fuzz will run N iterations before re-spawning the process "
+        "(default: "
+        "INT_MAX)\n"
+        "For stdin input processing, pass '-' as single command line option.\n"
+        "For file input processing, pass '@@' as single command line option.\n"
+        "===================================================================\n",
+        argv[0], argv[0]);
 
   if (getenv("AFL_GDB")) {