about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2020-12-23 14:58:52 +0100
committerGitHub <noreply@github.com>2020-12-23 14:58:52 +0100
commit7dc433a0c0efb236a2ab6fa4006a91aa02e26779 (patch)
treeb0a48cd22af9aaa9695b61d08499232dbe9f0f36 /src
parent1078409875cacc5626a3961e08cf67c68049d22a (diff)
parent8d10d12ab344bdf6dfe0478e927c92278b4aac78 (diff)
downloadafl++-7dc433a0c0efb236a2ab6fa4006a91aa02e26779.tar.gz
Merge pull request #637 from AFLplusplus/dev
minor push to stable
Diffstat (limited to 'src')
-rw-r--r--src/afl-analyze.c20
-rw-r--r--src/afl-cc.c145
-rw-r--r--src/afl-common.c5
-rw-r--r--src/afl-forkserver.c4
-rw-r--r--src/afl-fuzz-bitmap.c330
-rw-r--r--src/afl-fuzz-init.c2
-rw-r--r--src/afl-fuzz-run.c9
-rw-r--r--src/afl-fuzz-state.c17
-rw-r--r--src/afl-fuzz-stats.c2
-rw-r--r--src/afl-fuzz.c21
-rw-r--r--src/afl-performance.c124
-rw-r--r--src/afl-showmap.c28
-rw-r--r--src/afl-tmin.c20
13 files changed, 369 insertions, 358 deletions
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index 2780deff..a6825ef6 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -103,19 +103,29 @@ static u32 map_size = MAP_SIZE;
 /* Classify tuple counts. This is a slow & naive version, but good enough here.
  */
 
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
 static u8 count_class_lookup[256] = {
 
     [0] = 0,
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
 static void classify_counts(u8 *mem) {
 
diff --git a/src/afl-cc.c b/src/afl-cc.c
index 2aeb2178..66f4860f 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -62,7 +62,7 @@ u8          use_stdin;                                             /* dummy */
 
 enum {
 
-  INSTURMENT_DEFAULT = 0,
+  INSTRUMENT_DEFAULT = 0,
   INSTRUMENT_CLASSIC = 1,
   INSTRUMENT_AFL = 1,
   INSTRUMENT_PCGUARD = 2,
@@ -70,6 +70,8 @@ enum {
   INSTRUMENT_CFG = 3,
   INSTRUMENT_LTO = 4,
   INSTRUMENT_LLVMNATIVE = 5,
+  INSTRUMENT_GCC = 6,
+  INSTRUMENT_CLANG = 7,
   INSTRUMENT_OPT_CTX = 8,
   INSTRUMENT_OPT_NGRAM = 16
 
@@ -77,9 +79,24 @@ enum {
 
 char instrument_mode_string[18][18] = {
 
-    "DEFAULT", "CLASSIC", "PCGUARD", "CFG", "LTO", "", "PCGUARD-NATIVE",
-    "",        "CTX",     "",        "",    "",    "", "",
-    "",        "",        "NGRAM",   ""
+    "DEFAULT",
+    "CLASSIC",
+    "PCGUARD",
+    "CFG",
+    "LTO",
+    "PCGUARD-NATIVE",
+    "GCC",
+    "CLANG",
+    "CTX",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "NGRAM",
+    ""
 
 };
 
@@ -89,14 +106,15 @@ enum {
   LTO = 1,
   LLVM = 2,
   GCC_PLUGIN = 3,
-  GCC = 4
+  GCC = 4,
+  CLANG = 5
 
 };
 
-char compiler_mode_string[6][12] = {
+char compiler_mode_string[7][12] = {
 
     "AUTOSELECT", "LLVM-LTO", "LLVM", "GCC_PLUGIN",
-    "GCC",        ""
+    "GCC",        "CLANG",    ""
 
 };
 
@@ -324,6 +342,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
           alt_cxx = clang_mode ? "clang++" : "g++";
 
+        } else if (compiler_mode == CLANG) {
+
+          alt_cxx = "clang++";
+
         } else {
 
           alt_cxx = "g++";
@@ -357,6 +379,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
           alt_cc = clang_mode ? "clang" : "gcc";
 
+        } else if (compiler_mode == CLANG) {
+
+          alt_cc = "clang";
+
         } else {
 
           alt_cc = "gcc";
@@ -380,12 +406,16 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-  if (compiler_mode == GCC) {
+  if (compiler_mode == GCC || compiler_mode == CLANG) {
 
     cc_params[cc_par_cnt++] = "-B";
     cc_params[cc_par_cnt++] = obj_path;
 
-    if (clang_mode) { cc_params[cc_par_cnt++] = "-no-integrated-as"; }
+    if (clang_mode || compiler_mode == CLANG) {
+
+      cc_params[cc_par_cnt++] = "-no-integrated-as";
+
+    }
 
   }
 
@@ -996,12 +1026,16 @@ int main(int argc, char **argv, char **envp) {
 
   } else if (strncmp(callname, "afl-gcc", 7) == 0 ||
 
-             strncmp(callname, "afl-g++", 7) == 0 ||
-
-             strncmp(callname, "afl-clang", 9) == 0) {
+             strncmp(callname, "afl-g++", 7) == 0) {
 
     compiler_mode = GCC;
 
+  } else if (strncmp(callname, "afl-clang", 9) == 0 &&
+
+             strstr(callname, "fast") == NULL) {
+
+    compiler_mode = CLANG;
+
   }
 
   if ((ptr = getenv("AFL_CC_COMPILER"))) {
@@ -1042,9 +1076,11 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
-  if (strncmp(callname, "afl-clang", 9) == 0) {
+  if (strncmp(callname, "afl-clang", 9) == 0 &&
+      strstr(callname, "fast") == NULL) {
 
     clang_mode = 1;
+    compiler_mode = CLANG;
 
     if (strncmp(callname, "afl-clang++", 11) == 0) { plusplus_mode = 1; }
 
@@ -1072,6 +1108,34 @@ int main(int argc, char **argv, char **envp) {
 
         compiler_mode = LLVM;
 
+      } else if (strncasecmp(ptr, "PCGUARD", 7) == 0 ||
+
+                 strncasecmp(ptr, "PC-GUARD", 8) == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_PCGUARD;
+
+      } else if (strcasecmp(ptr, "INSTRIM") == 0 ||
+
+                 strcasecmp(ptr, "CFG") == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_CFG;
+
+      } else if (strcasecmp(ptr, "AFL") == 0 ||
+
+                 strcasecmp(ptr, "CLASSIC") == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_CLASSIC;
+
+      } else if (strcasecmp(ptr, "LLVMNATIVE") == 0 ||
+
+                 strcasecmp(ptr, "LLVM-NATIVE") == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_LLVMNATIVE;
+
       } else if (strncasecmp(ptr, "GCC_P", 5) == 0 ||
 
                  strncasecmp(ptr, "GCC-P", 5) == 0 ||
@@ -1083,6 +1147,10 @@ int main(int argc, char **argv, char **envp) {
 
         compiler_mode = GCC;
 
+      } else if (strncasecmp(ptr, "CLANG", 5) == 0) {
+
+        compiler_mode = CLANG;
+
       } else
 
         FATAL("Unknown --afl-... compiler mode: %s\n", argv[i]);
@@ -1212,6 +1280,28 @@ int main(int argc, char **argv, char **envp) {
 
       }
 
+      if (strcasecmp(ptr, "gcc") == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_GCC)
+          instrument_mode = INSTRUMENT_GCC;
+        else if (instrument_mode != INSTRUMENT_GCC)
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+        compiler_mode = GCC;
+
+      }
+
+      if (strcasecmp(ptr, "clang") == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_CLANG)
+          instrument_mode = INSTRUMENT_CLANG;
+        else if (instrument_mode != INSTRUMENT_CLANG)
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+        compiler_mode = CLANG;
+
+      }
+
       if (strncasecmp(ptr, "ctx", strlen("ctx")) == 0) {
 
         instrument_opt_mode |= INSTRUMENT_OPT_CTX;
@@ -1270,6 +1360,22 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if (compiler_mode == GCC) {
+
+    if (clang_mode) {
+
+      instrument_mode = CLANG;
+
+    } else {
+
+      instrument_mode = GCC;
+
+    }
+
+  }
+
+  if (compiler_mode == CLANG) { instrument_mode = CLANG; }
+
   if (argc < 2 || strncmp(argv[1], "-h", 2) == 0) {
 
     printf("afl-cc" VERSION
@@ -1316,7 +1422,7 @@ int main(int argc, char **argv, char **envp) {
         "  [GCC_PLUGIN] gcc plugin: %s%s\n"
         "      CLASSIC              DEFAULT    no  yes     yes  no     no  no  "
         "   yes\n"
-        "  [GCC] simple gcc:        %s%s\n"
+        "  [GCC/CLANG] simple gcc/clang: %s%s\n"
         "      CLASSIC              DEFAULT    no  no      no   no     no  no  "
         "   no\n\n",
         have_lto ? "AVAILABLE" : "unavailable!",
@@ -1328,7 +1434,7 @@ int main(int argc, char **argv, char **envp) {
         have_gcc_plugin ? "AVAILABLE" : "unavailable!",
         compiler_mode == GCC_PLUGIN ? " [SELECTED]" : "",
         have_gcc ? "AVAILABLE" : "unavailable!",
-        compiler_mode == GCC ? " [SELECTED]" : "");
+        (compiler_mode == GCC || compiler_mode == CLANG) ? " [SELECTED]" : "");
 
     SAYF(
         "Modes:\n"
@@ -1346,6 +1452,10 @@ int main(int argc, char **argv, char **envp) {
         "Sub-Modes: (set via env AFL_LLVM_INSTRUMENT, afl-cc selects the best "
         "available)\n"
         "  PCGUARD: Dominator tree instrumentation (best!) (README.llvm.md)\n"
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+        "  NATIVE:  use llvm's native PCGUARD instrumentation (less "
+        "performant)\n"
+#endif
         "  CLASSIC: decision target instrumentation (README.llvm.md)\n"
         "  CTX:     CLASSIC + callee context (instrumentation/README.ctx.md)\n"
         "  NGRAM-x: CLASSIC + previous path "
@@ -1432,7 +1542,7 @@ int main(int argc, char **argv, char **envp) {
             "  AFL_LLVM_LAF_SPLIT_FLOATS: cascaded comparisons on floats\n"
             "  AFL_LLVM_LAF_TRANSFORM_COMPARES: cascade comparisons for string "
             "functions\n"
-            "  AFL_LLVM_INSTRUMENT_ALLOW/AFL_LLVM_INSTRUMENT_DENY: enable "
+            "  AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST: enable "
             "instrument allow/\n"
             "    deny listing (selective instrumentation)\n");
 
@@ -1441,7 +1551,8 @@ int main(int argc, char **argv, char **envp) {
             "  AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen "
             "mutator)\n"
             "  AFL_LLVM_INSTRUMENT: set instrumentation mode:\n"
-            "    CLASSIC, INSTRIM, PCGUARD, LTO, CTX, NGRAM-2 ... NGRAM-16\n"
+            "    CLASSIC, INSTRIM, PCGUARD, LTO, GCC, CLANG, CTX, NGRAM-2 ... "
+            "NGRAM-16\n"
             " You can also use the old environment variables instead:\n"
             "  AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n"
             "  AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
diff --git a/src/afl-common.c b/src/afl-common.c
index 4df22394..6dc8abe0 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <strings.h>
+#include <math.h>
 
 #include "debug.h"
 #include "alloc-inl.h"
@@ -786,6 +787,10 @@ u8 *u_stringify_float(u8 *buf, double val) {
 
     sprintf(buf, "%0.01f", val);
 
+  } else if (unlikely(isnan(val) || isfinite(val))) {
+
+    strcpy(buf, "999.9");
+
   } else {
 
     return u_stringify_int(buf, (u64)val);
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 3afb94be..90fa55e9 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -641,11 +641,11 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
         if (!fsrv->map_size) { fsrv->map_size = MAP_SIZE; }
 
-        if (unlikely(tmp_map_size % 8)) {
+        if (unlikely(tmp_map_size % 32)) {
 
           // should not happen
           WARNF("Target reported non-aligned map size of %u", tmp_map_size);
-          tmp_map_size = (((tmp_map_size + 8) >> 3) << 3);
+          tmp_map_size = (((tmp_map_size + 31) >> 5) << 5);
 
         }
 
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index f1ca7400..1cb9b15f 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -25,6 +25,9 @@
 
 #include "afl-fuzz.h"
 #include <limits.h>
+#if !defined NAME_MAX
+#define NAME_MAX _XOPEN_NAME_MAX
+#endif
 
 /* Write bitmap to file. The bitmap is useful mostly for the secret
    -B option, to focus a separate fuzzing session on a particular
@@ -49,101 +52,6 @@ void write_bitmap(afl_state_t *afl) {
 
 }
 
-/* Check if the current execution path brings anything new to the table.
-   Update virgin bits to reflect the finds. Returns 1 if the only change is
-   the hit-count for a particular tuple; 2 if there are new tuples seen.
-   Updates the map, so subsequent calls will always return 0.
-
-   This function is called after every exec() on a fairly large buffer, so
-   it needs to be fast. We do this in 32-bit and 64-bit flavors. */
-
-u8 __attribute__((hot)) has_new_bits(afl_state_t *afl, u8 *virgin_map) {
-
-#ifdef WORD_SIZE_64
-
-  u64 *current = (u64 *)afl->fsrv.trace_bits;
-  u64 *virgin = (u64 *)virgin_map;
-
-  u32 i = (afl->fsrv.map_size >> 3);
-
-#else
-
-  u32 *current = (u32 *)afl->fsrv.trace_bits;
-  u32 *virgin = (u32 *)virgin_map;
-
-  u32 i = (afl->fsrv.map_size >> 2);
-
-#endif                                                     /* ^WORD_SIZE_64 */
-  // the map size must be a minimum of 8 bytes.
-  // for variable/dynamic map sizes this is ensured in the forkserver
-
-  u8 ret = 0;
-
-  while (i--) {
-
-    /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
-       that have not been already cleared from the virgin map - since this will
-       almost always be the case. */
-
-    // the (*current) is unnecessary but speeds up the overall comparison
-    if (unlikely(*current) && unlikely(*current & *virgin)) {
-
-      if (likely(ret < 2)) {
-
-        u8 *cur = (u8 *)current;
-        u8 *vir = (u8 *)virgin;
-
-        /* Looks like we have not found any new bytes yet; see if any non-zero
-           bytes in current[] are pristine in virgin[]. */
-
-#ifdef WORD_SIZE_64
-
-        if (*virgin == 0xffffffffffffffff || (cur[0] && vir[0] == 0xff) ||
-            (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) ||
-            (cur[3] && vir[3] == 0xff) || (cur[4] && vir[4] == 0xff) ||
-            (cur[5] && vir[5] == 0xff) || (cur[6] && vir[6] == 0xff) ||
-            (cur[7] && vir[7] == 0xff)) {
-
-          ret = 2;
-
-        } else {
-
-          ret = 1;
-
-        }
-
-#else
-
-        if (*virgin == 0xffffffff || (cur[0] && vir[0] == 0xff) ||
-            (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) ||
-            (cur[3] && vir[3] == 0xff))
-          ret = 2;
-        else
-          ret = 1;
-
-#endif                                                     /* ^WORD_SIZE_64 */
-
-      }
-
-      *virgin &= ~*current;
-
-    }
-
-    ++current;
-    ++virgin;
-
-  }
-
-  if (unlikely(ret) && likely(virgin_map == afl->virgin_bits)) {
-
-    afl->bitmap_changed = 1;
-
-  }
-
-  return ret;
-
-}
-
 /* Count the number of bits set in the provided bitmap. Used for the status
    screen several times every second, does not have to be fast. */
 
@@ -192,10 +100,10 @@ u32 count_bytes(afl_state_t *afl, u8 *mem) {
     u32 v = *(ptr++);
 
     if (!v) { continue; }
-    if (v & 0x000000ff) { ++ret; }
-    if (v & 0x0000ff00) { ++ret; }
-    if (v & 0x00ff0000) { ++ret; }
-    if (v & 0xff000000) { ++ret; }
+    if (v & 0x000000ffU) { ++ret; }
+    if (v & 0x0000ff00U) { ++ret; }
+    if (v & 0x00ff0000U) { ++ret; }
+    if (v & 0xff000000U) { ++ret; }
 
   }
 
@@ -219,11 +127,11 @@ u32 count_non_255_bytes(afl_state_t *afl, u8 *mem) {
     /* This is called on the virgin bitmap, so optimize for the most likely
        case. */
 
-    if (v == 0xffffffff) { continue; }
-    if ((v & 0x000000ff) != 0x000000ff) { ++ret; }
-    if ((v & 0x0000ff00) != 0x0000ff00) { ++ret; }
-    if ((v & 0x00ff0000) != 0x00ff0000) { ++ret; }
-    if ((v & 0xff000000) != 0xff000000) { ++ret; }
+    if (v == 0xffffffffU) { continue; }
+    if ((v & 0x000000ffU) != 0x000000ffU) { ++ret; }
+    if ((v & 0x0000ff00U) != 0x0000ff00U) { ++ret; }
+    if ((v & 0x00ff0000U) != 0x00ff0000U) { ++ret; }
+    if ((v & 0xff000000U) != 0xff000000U) { ++ret; }
 
   }
 
@@ -235,98 +143,43 @@ u32 count_non_255_bytes(afl_state_t *afl, u8 *mem) {
    and replacing it with 0x80 or 0x01 depending on whether the tuple
    is hit or not. Called on every new crash or timeout, should be
    reasonably fast. */
-
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
+#define TIMES255(x) TIMES64(x),TIMES64(x),TIMES64(x),TIMES32(x),TIMES16(x),TIMES8(x),TIMES4(x),x,x,x
 const u8 simplify_lookup[256] = {
 
-    [0] = 1, [1 ... 255] = 128
+    [0] = 1, [1] = TIMES255(128)
 
 };
 
-#ifdef WORD_SIZE_64
-
-void simplify_trace(afl_state_t *afl, u64 *mem) {
-
-  u32 i = (afl->fsrv.map_size >> 3);
-
-  while (i--) {
-
-    /* Optimize for sparse bitmaps. */
-
-    if (unlikely(*mem)) {
-
-      u8 *mem8 = (u8 *)mem;
-
-      mem8[0] = simplify_lookup[mem8[0]];
-      mem8[1] = simplify_lookup[mem8[1]];
-      mem8[2] = simplify_lookup[mem8[2]];
-      mem8[3] = simplify_lookup[mem8[3]];
-      mem8[4] = simplify_lookup[mem8[4]];
-      mem8[5] = simplify_lookup[mem8[5]];
-      mem8[6] = simplify_lookup[mem8[6]];
-      mem8[7] = simplify_lookup[mem8[7]];
-
-    } else {
-
-      *mem = 0x0101010101010101ULL;
-
-    }
-
-    ++mem;
-
-  }
-
-}
-
-#else
-
-void simplify_trace(afl_state_t *afl, u32 *mem) {
-
-  u32 i = (afl->fsrv.map_size >> 2);
-
-  while (i--) {
-
-    /* Optimize for sparse bitmaps. */
-
-    if (unlikely(*mem)) {
-
-      u8 *mem8 = (u8 *)mem;
-
-      mem8[0] = simplify_lookup[mem8[0]];
-      mem8[1] = simplify_lookup[mem8[1]];
-      mem8[2] = simplify_lookup[mem8[2]];
-      mem8[3] = simplify_lookup[mem8[3]];
-
-    } else
-
-      *mem = 0x01010101;
-
-    ++mem;
-
-  }
-
-}
-
-#endif                                                     /* ^WORD_SIZE_64 */
-
 /* Destructively classify execution counts in a trace. This is used as a
    preprocessing step for any newly acquired traces. Called on every exec,
    must be fast. */
 
-static const u8 count_class_lookup8[256] = {
+const u8 count_class_lookup8[256] = {
 
     [0] = 0,
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES255
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
-static u16 count_class_lookup16[65536];
+u16 count_class_lookup16[65536];
 
 void init_count_class16(void) {
 
@@ -345,63 +198,87 @@ void init_count_class16(void) {
 
 }
 
-#ifdef WORD_SIZE_64
+/* Import coverage processing routines. */
 
-void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) {
+#ifdef WORD_SIZE_64
+  #include "coverage-64.h"
+#else
+  #include "coverage-32.h"
+#endif
 
-  u64 *mem = (u64 *)fsrv->trace_bits;
+/* Check if the current execution path brings anything new to the table.
+   Update virgin bits to reflect the finds. Returns 1 if the only change is
+   the hit-count for a particular tuple; 2 if there are new tuples seen.
+   Updates the map, so subsequent calls will always return 0.
 
-  u32 i = (fsrv->map_size >> 3);
+   This function is called after every exec() on a fairly large buffer, so
+   it needs to be fast. We do this in 32-bit and 64-bit flavors. */
 
-  while (i--) {
+inline u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) {
 
-    /* Optimize for sparse bitmaps. */
+#ifdef WORD_SIZE_64
 
-    if (unlikely(*mem)) {
+  u64 *current = (u64 *)afl->fsrv.trace_bits;
+  u64 *virgin = (u64 *)virgin_map;
 
-      u16 *mem16 = (u16 *)mem;
+  u32 i = (afl->fsrv.map_size >> 3);
 
-      mem16[0] = count_class_lookup16[mem16[0]];
-      mem16[1] = count_class_lookup16[mem16[1]];
-      mem16[2] = count_class_lookup16[mem16[2]];
-      mem16[3] = count_class_lookup16[mem16[3]];
+#else
 
-    }
+  u32 *current = (u32 *)afl->fsrv.trace_bits;
+  u32 *virgin = (u32 *)virgin_map;
 
-    ++mem;
+  u32 i = (afl->fsrv.map_size >> 2);
 
-  }
+#endif                                                     /* ^WORD_SIZE_64 */
 
-}
+  u8 ret = 0;
+  while (i--) {
 
-#else
+    if (unlikely(*current)) discover_word(&ret, current, virgin);
 
-void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) {
+    current++;
+    virgin++;
 
-  u32 *mem = (u32 *)fsrv->trace_bits;
+  }
 
-  u32 i = (fsrv->map_size >> 2);
+  if (unlikely(ret) && likely(virgin_map == afl->virgin_bits))
+    afl->bitmap_changed = 1;
 
-  while (i--) {
+  return ret;
 
-    /* Optimize for sparse bitmaps. */
+}
 
-    if (unlikely(*mem)) {
+/* A combination of classify_counts and has_new_bits. If 0 is returned, then the
+ * trace bits are kept as-is. Otherwise, the trace bits are overwritten with
+ * classified values.
+ *
+ * This accelerates the processing: in most cases, no interesting behavior
+ * happen, and the trace bits will be discarded soon. This function optimizes
+ * for such cases: one-pass scan on trace bits without modifying anything. Only
+ * on rare cases it fall backs to the slow path: classify_counts() first, then
+ * return has_new_bits(). */
 
-      u16 *mem16 = (u16 *)mem;
+inline u8 has_new_bits_unclassified(afl_state_t *afl, u8 *virgin_map) {
 
-      mem16[0] = count_class_lookup16[mem16[0]];
-      mem16[1] = count_class_lookup16[mem16[1]];
+  /* Handle the hot path first: no new coverage */
+  u8 *end = afl->fsrv.trace_bits + afl->fsrv.map_size;
 
-    }
+#ifdef WORD_SIZE_64
 
-    ++mem;
+  if (!skim((u64 *)virgin_map, (u64 *)afl->fsrv.trace_bits, (u64 *)end))
+    return 0;
 
-  }
+#else
 
-}
+  if (!skim((u32 *)virgin_map, (u32 *)afl->fsrv.trace_bits, (u32 *)end))
+    return 0;
 
 #endif                                                     /* ^WORD_SIZE_64 */
+  classify_counts(&afl->fsrv);
+  return has_new_bits(afl, virgin_map);
+
+}
 
 /* Compact trace bytes into a smaller bitmap. We effectively just drop the
    count information here. This is called only sporadically, for some
@@ -581,7 +458,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
   u8 *queue_fn = "";
   u8  new_bits = '\0';
   s32 fd;
-  u8  keeping = 0, res;
+  u8  keeping = 0, res, classified = 0;
   u64 cksum = 0;
 
   u8 fn[PATH_MAX];
@@ -605,13 +482,17 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
     /* Keep only if there are new bits in the map, add to queue for
        future fuzzing, etc. */
 
-    if (!(new_bits = has_new_bits(afl, afl->virgin_bits))) {
+    new_bits = has_new_bits_unclassified(afl, afl->virgin_bits);
+
+    if (likely(!new_bits)) {
 
       if (unlikely(afl->crash_mode)) { ++afl->total_crashes; }
       return 0;
 
     }
 
+    classified = new_bits;
+
 #ifndef SIMPLE_FILES
 
     queue_fn = alloc_printf(
@@ -715,11 +596,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-#ifdef WORD_SIZE_64
-        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
-#else
-        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
+        if (!classified) {
+
+          classify_counts(&afl->fsrv);
+          classified = 1;
+
+        }
+
+        simplify_trace(afl, afl->fsrv.trace_bits);
 
         if (!has_new_bits(afl, afl->virgin_tmout)) { return keeping; }
 
@@ -764,6 +648,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
         u8 new_fault;
         write_to_testcase(afl, mem, len);
         new_fault = fuzz_run_target(afl, &afl->fsrv, afl->hang_tmout);
+        classify_counts(&afl->fsrv);
 
         /* A corner case that one user reported bumping into: increasing the
            timeout actually uncovers a crash. Make sure we don't discard it if
@@ -812,11 +697,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-#ifdef WORD_SIZE_64
-        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
-#else
-        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
+        if (!classified) {
+
+          classify_counts(&afl->fsrv);
+          classified = 1;
+
+        }
+
+        simplify_trace(afl, afl->fsrv.trace_bits);
 
         if (!has_new_bits(afl, afl->virgin_crash)) { return keeping; }
 
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 0db3a111..ec937f29 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -666,7 +666,7 @@ void read_testcases(afl_state_t *afl, u8 *directory) {
 
   }
 
-  if (afl->shuffle_queue && nl_cnt > 1) {
+  if (unlikely(afl->old_seed_selection && afl->shuffle_queue && nl_cnt > 1)) {
 
     ACTF("Shuffling queue...");
     shuffle_ptrs(afl, (void **)nl, nl_cnt);
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index a97ceb89..32cca579 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -28,6 +28,9 @@
 #include <sys/time.h>
 #include <signal.h>
 #include <limits.h>
+#if !defined NAME_MAX
+#define NAME_MAX _XOPEN_NAME_MAX
+#endif
 
 #include "cmplog.h"
 
@@ -62,8 +65,6 @@ fuzz_run_target(afl_state_t *afl, afl_forkserver_t *fsrv, u32 timeout) {
   time_spent_start = (spec.tv_sec * 1000000000) + spec.tv_nsec;
 #endif
 
-  // TODO: Don't classify for faults?
-  classify_counts(fsrv);
   return res;
 
 }
@@ -379,6 +380,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
     }
 
+    classify_counts(&afl->fsrv);
     cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
     if (q->exec_cksum != cksum) {
 
@@ -767,13 +769,14 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
       write_with_gap(afl, in_buf, q->len, remove_pos, trim_avail);
 
       fault = fuzz_run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
-      ++afl->trim_execs;
 
       if (afl->stop_soon || fault == FSRV_RUN_ERROR) { goto abort_trimming; }
 
       /* Note that we don't keep track of crashes or hangs here; maybe TODO?
        */
 
+      ++afl->trim_execs;
+      classify_counts(&afl->fsrv);
       cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
       /* If the deletion had no impact on the trace, make it permanent. This
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 9c51a3ef..7053572b 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -401,6 +401,23 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_crash_exitcode =
                 (u8 *)get_afl_env(afl_environment_variables[i]);
 
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+
+          } else if (!strncmp(env, "AFL_NO_COLOR",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_statsd_tags_flavor =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+
+          } else if (!strncmp(env, "AFL_NO_COLOUR",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_statsd_tags_flavor =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+#endif
+
           }
 
         } else {
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 321bbb35..50e2ef15 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -371,6 +371,8 @@ void show_stats(afl_state_t *afl) {
 
   if (!afl->stats_last_execs) {
 
+    if (unlikely(cur_ms == afl->start_time)) --afl->start_time;
+
     afl->stats_avg_exec =
         ((double)afl->fsrv.total_execs) * 1000 / (cur_ms - afl->start_time);
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 391d4c4f..2af374f2 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -156,6 +156,13 @@ static void usage(u8 *argv0, int more_help) {
 
   if (more_help > 1) {
 
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+  #define DYN_COLOR \
+    "AFL_NO_COLOR or AFL_NO_COLOUR: switch colored console output off\n"
+#else
+  #define DYN_COLOR
+#endif
+
     SAYF(
       "Environment variables used:\n"
       "LD_BIND_LAZY: do not set LD_BIND_NOW env var for target\n"
@@ -194,6 +201,9 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n"
       "AFL_NO_SNAPSHOT: do not use the snapshot feature (if the snapshot lkm is loaded)\n"
       "AFL_NO_UI: switch status screen off\n"
+
+      DYN_COLOR
+
       "AFL_PATH: path to AFL support binaries\n"
       "AFL_PYTHON_MODULE: mutate and trim inputs with the specified Python module\n"
       "AFL_QUIET: suppress forkserver status messages\n"
@@ -298,6 +308,17 @@ int main(int argc, char **argv_orig, char **envp) {
   struct timeval  tv;
   struct timezone tz;
 
+  #if defined USE_COLOR && defined ALWAYS_COLORED
+  if (getenv("AFL_NO_COLOR") || getenv("AFL_NO_COLOUR")) {
+
+    WARNF(
+        "Setting AFL_NO_COLOR has no effect (colors are configured on at "
+        "compile time)");
+
+  }
+
+  #endif
+
   char **argv = argv_cpy_dup(argc, argv_orig);
 
   afl_state_t *afl = calloc(1, sizeof(afl_state_t));
diff --git a/src/afl-performance.c b/src/afl-performance.c
index e070a05e..89b170eb 100644
--- a/src/afl-performance.c
+++ b/src/afl-performance.c
@@ -27,45 +27,49 @@
 #include "xxhash.h"
 #undef XXH_INLINE_ALL
 
-/* we use xoshiro256** instead of rand/random because it is 10x faster and has
-   better randomness properties. */
-
-static inline uint64_t rotl(const uint64_t x, int k) {
-
-  return (x << k) | (x >> (64 - k));
-
-}
-
 void rand_set_seed(afl_state_t *afl, s64 init_seed) {
 
   afl->init_seed = init_seed;
   afl->rand_seed[0] =
       hash64((u8 *)&afl->init_seed, sizeof(afl->init_seed), HASH_CONST);
   afl->rand_seed[1] = afl->rand_seed[0] ^ 0x1234567890abcdef;
-  afl->rand_seed[2] = afl->rand_seed[0] & 0x0123456789abcdef;
-  afl->rand_seed[3] = afl->rand_seed[0] | 0x01abcde43f567908;
+  afl->rand_seed[2] = (afl->rand_seed[0] & 0x1234567890abcdef) ^
+                      (afl->rand_seed[1] | 0xfedcba9876543210);
 
 }
 
-inline uint64_t rand_next(afl_state_t *afl) {
+#define ROTL(d, lrot) ((d << (lrot)) | (d >> (8 * sizeof(d) - (lrot))))
 
-  const uint64_t result =
-      rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0];
+#ifdef WORD_SIZE_64
+// romuDuoJr
+inline AFL_RAND_RETURN rand_next(afl_state_t *afl) {
 
-  const uint64_t t = afl->rand_seed[1] << 17;
+  AFL_RAND_RETURN xp = afl->rand_seed[0];
+  afl->rand_seed[0] = 15241094284759029579u * afl->rand_seed[1];
+  afl->rand_seed[1] = afl->rand_seed[1] - xp;
+  afl->rand_seed[1] = ROTL(afl->rand_seed[1], 27);
+  return xp;
 
-  afl->rand_seed[2] ^= afl->rand_seed[0];
-  afl->rand_seed[3] ^= afl->rand_seed[1];
-  afl->rand_seed[1] ^= afl->rand_seed[2];
-  afl->rand_seed[0] ^= afl->rand_seed[3];
+}
 
-  afl->rand_seed[2] ^= t;
+#else
+// RomuTrio32
+inline AFL_RAND_RETURN rand_next(afl_state_t *afl) {
+
+  AFL_RAND_RETURN xp = afl->rand_seed[0], yp = afl->rand_seed[1],
+                  zp = afl->rand_seed[2];
+  afl->rand_seed[0] = 3323815723u * zp;
+  afl->rand_seed[1] = yp - xp;
+  afl->rand_seed[1] = ROTL(afl->rand_seed[1], 6);
+  afl->rand_seed[2] = zp - yp;
+  afl->rand_seed[2] = ROTL(afl->rand_seed[2], 22);
+  return xp;
 
-  afl->rand_seed[3] = rotl(afl->rand_seed[3], 45);
+}
 
-  return result;
+#endif
 
-}
+#undef ROTL
 
 /* returns a double between 0.000000000 and 1.000000000 */
 
@@ -75,80 +79,6 @@ inline double rand_next_percent(afl_state_t *afl) {
 
 }
 
-/* This is the jump function for the generator. It is equivalent
-   to 2^128 calls to rand_next(); it can be used to generate 2^128
-   non-overlapping subsequences for parallel computations. */
-
-void jump(afl_state_t *afl) {
-
-  static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c,
-                                  0xa9582618e03fc9aa, 0x39abdc4529b1661c};
-  size_t                i, b;
-  uint64_t              s0 = 0;
-  uint64_t              s1 = 0;
-  uint64_t              s2 = 0;
-  uint64_t              s3 = 0;
-  for (i = 0; i < (sizeof(JUMP) / sizeof(*JUMP)); i++)
-    for (b = 0; b < 64; b++) {
-
-      if (JUMP[i] & UINT64_C(1) << b) {
-
-        s0 ^= afl->rand_seed[0];
-        s1 ^= afl->rand_seed[1];
-        s2 ^= afl->rand_seed[2];
-        s3 ^= afl->rand_seed[3];
-
-      }
-
-      rand_next(afl);
-
-    }
-
-  afl->rand_seed[0] = s0;
-  afl->rand_seed[1] = s1;
-  afl->rand_seed[2] = s2;
-  afl->rand_seed[3] = s3;
-
-}
-
-/* This is the long-jump function for the generator. It is equivalent to
-   2^192 calls to rand_next(); it can be used to generate 2^64 starting points,
-   from each of which jump() will generate 2^64 non-overlapping
-   subsequences for parallel distributed computations. */
-
-void long_jump(afl_state_t *afl) {
-
-  static const uint64_t LONG_JUMP[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3,
-                                       0x77710069854ee241, 0x39109bb02acbe635};
-
-  size_t   i, b;
-  uint64_t s0 = 0;
-  uint64_t s1 = 0;
-  uint64_t s2 = 0;
-  uint64_t s3 = 0;
-  for (i = 0; i < (sizeof(LONG_JUMP) / sizeof(*LONG_JUMP)); i++)
-    for (b = 0; b < 64; b++) {
-
-      if (LONG_JUMP[i] & UINT64_C(1) << b) {
-
-        s0 ^= afl->rand_seed[0];
-        s1 ^= afl->rand_seed[1];
-        s2 ^= afl->rand_seed[2];
-        s3 ^= afl->rand_seed[3];
-
-      }
-
-      rand_next(afl);
-
-    }
-
-  afl->rand_seed[0] = s0;
-  afl->rand_seed[1] = s1;
-  afl->rand_seed[2] = s2;
-  afl->rand_seed[3] = s3;
-
-}
-
 /* we switch from afl's murmur implementation to xxh3 as it is 30% faster -
    and get 64 bit hashes instead of just 32 bit. Less collisions! :-) */
 
diff --git a/src/afl-showmap.c b/src/afl-showmap.c
index 34a4f30d..b891632a 100644
--- a/src/afl-showmap.c
+++ b/src/afl-showmap.c
@@ -98,11 +98,18 @@ static sharedmem_t *     shm_fuzz;
 /* Classify tuple counts. Instead of mapping to individual bits, as in
    afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
 
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
+#define TIMES96(x) TIMES64(x),TIMES32(x)
+#define TIMES128(x) TIMES64(x),TIMES64(x)
 static const u8 count_class_human[256] = {
 
     [0] = 0,          [1] = 1,        [2] = 2,         [3] = 3,
-    [4 ... 7] = 4,    [8 ... 15] = 5, [16 ... 31] = 6, [32 ... 127] = 7,
-    [128 ... 255] = 8
+    [4] = TIMES4(4),  [8] = TIMES8(5),[16] = TIMES16(6),[32] = TIMES96(7),
+    [128] = TIMES128(8)
 
 };
 
@@ -112,13 +119,20 @@ static const u8 count_class_binary[256] = {
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES128
+#undef TIMES96
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
 static void classify_counts(afl_forkserver_t *fsrv) {
 
diff --git a/src/afl-tmin.c b/src/afl-tmin.c
index b9045551..6cb0d458 100644
--- a/src/afl-tmin.c
+++ b/src/afl-tmin.c
@@ -98,19 +98,29 @@ static sharedmem_t *     shm_fuzz;
 /* Classify tuple counts. This is a slow & naive version, but good enough here.
  */
 
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
 static const u8 count_class_lookup[256] = {
 
     [0] = 0,
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
 static sharedmem_t *deinit_shmem(afl_forkserver_t *fsrv,
                                  sharedmem_t *     shm_fuzz) {