about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/ci.yml27
-rw-r--r--.github/workflows/codeql-analysis.yml32
-rw-r--r--.travis.yml59
-rw-r--r--GNUmakefile43
-rw-r--r--GNUmakefile.gcc_plugin2
-rw-r--r--GNUmakefile.llvm4
-rw-r--r--README.md2
-rwxr-xr-xafl-cmin44
-rwxr-xr-xafl-system-config1
-rw-r--r--docs/Changelog.md17
-rw-r--r--docs/binaryonly_fuzzing.md5
-rw-r--r--docs/env_variables.md5
-rw-r--r--include/afl-fuzz.h20
-rw-r--r--include/config.h19
-rw-r--r--include/coverage-32.h112
-rw-r--r--include/coverage-64.h189
-rw-r--r--include/debug.h78
-rw-r--r--include/envs.h6
-rw-r--r--include/forkserver.h2
-rw-r--r--instrumentation/Makefile2
-rw-r--r--instrumentation/README.llvm.md21
-rw-r--r--instrumentation/afl-compiler-rt.o.c4
-rw-r--r--instrumentation/afl-gcc-pass.so.cc10
-rw-r--r--instrumentation/afl-llvm-common.cc10
-rw-r--r--src/afl-cc.c6
-rw-r--r--src/afl-common.c5
-rw-r--r--src/afl-forkserver.c4
-rw-r--r--src/afl-fuzz-bitmap.c284
-rw-r--r--src/afl-fuzz-init.c2
-rw-r--r--src/afl-fuzz-run.c6
-rw-r--r--src/afl-fuzz-state.c17
-rw-r--r--src/afl-fuzz-stats.c2
-rw-r--r--src/afl-fuzz.c21
-rw-r--r--src/afl-performance.c124
-rwxr-xr-xtest/test-basic.sh66
-rwxr-xr-xtest/test-gcc-plugin.sh36
-rwxr-xr-xtest/test-llvm.sh29
-rwxr-xr-xtest/test-pre.sh1
-rw-r--r--utils/afl_proxy/afl-proxy.c2
-rw-r--r--utils/afl_untracer/afl-untracer.c4
40 files changed, 800 insertions, 523 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1f7d23f4..31cfceaf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: C/C++ CI
+name: CI
 
 on:
   push:
@@ -8,12 +8,21 @@ on:
 
 jobs:
   build:
-
-    runs-on: ubuntu-latest
-
+    runs-on: '${{ matrix.os }}'
+    strategy:
+      matrix:
+        os: [ubuntu-20.04, ubuntu-18.04]
     steps:
-    - uses: actions/checkout@v2
-    - name: make
-      run: make
-    - name: make tests
-      run: make tests
+      - uses: actions/checkout@v2
+      - name: debug
+        run: apt-cache search plugin-dev | grep gcc- ; echo ; apt-cache search clang-format- | grep clang-format-
+      - name: install packages
+        run: sudo apt-get install -y -m -f --install-suggests build-essential git libtool libtool-bin automake bison libglib2.0-0 clang llvm-dev libc++-dev findutils libcmocka-dev python3-dev python3-setuptools
+      - name: compiler installed
+        run: gcc -v ; echo ; clang -v
+      - name: install gcc plugin
+        run: sudo apt-get install -y -m -f --install-suggests $(readlink /usr/bin/gcc)-plugin-dev
+      - name: build afl++
+        run: make distrib ASAN_BUILD=1
+      - name: run tests
+        run: sudo -E ./afl-system-config ; export AFL_SKIP_CPUFREQ=1 ; make tests
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 00000000..eda8dfd0
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,32 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ stable, dev ]
+  pull_request:
+    branches: [ stable, dev ]
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'cpp' ]
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index b8b36e6b..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-language: c
-
-sudo: required
-
-branches:
-  only:
-    - stable
-    - dev
-
-matrix:
-  include:
-  #- os: linux # again disabled because fetching packages times out very often :(
-  #  dist: focal
-  #  env: NAME="focal-amd64" MODERN="yes" GCC="9"
-  - os: linux
-    dist: bionic
-    env: NAME="bionic-amd64" MODERN="yes" GCC="7"
-  - os: linux
-    dist: xenial
-    env: NAME="xenial-amd64" MODERN="no" GCC="5" EXTRA="libtool-bin clang-6.0"
-#  - os: linux # disabled: fatal: unable to access 'https://git.qemu.org/git/capstone/': gnutls_handshake() failed: Handshake failed
-#    dist: trusty
-#    env: NAME="trusty-amd64" MODERN="no" GCC="4.8"
-  - os: linux # until travis can fix this!
-    dist: xenial
-    arch: arm64
-    env: NAME="xenial-arm64" MODERN="no" GCC="5" EXTRA="libtool-bin clang-6.0" AFL_NO_X86="1" CPU_TARGET="aarch64"
-#  - os: osx
-#    osx_image: xcode11.2
-#    env: NAME="osx" HOMEBREW_NO_ANALYTICS="1" LINK="http://releases.llvm.org/9.0.0/" NAME="clang+llvm-9.0.0-x86_64-darwin-apple"
-
-jobs:
-  allow_failures:
-    - os: osx
-    - arch: arm64
-
-env:
-  - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1
- # - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
- # TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
- # - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
-
-before_install:
-  # export LLVM_DIR=${TRAVIS_BUILD_DIR}/${LLVM_PACKAGE}
-  - echo Testing on $NAME
-  - if [ "$TRAVIS_OS_NAME" = "osx" ]; then wget "$LINK""$NAME".tar.xz ; export LLVM_CONFIG=`pwd`/"$NAME" ; tar xJf "$NAME".tar.xz ; fi
-  - if [ "$MODERN" = "yes" ]; then sudo apt update ; sudo apt upgrade ; sudo apt install -y git libtool libtool-bin automake bison libglib2.0-0 build-essential clang gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-"$GCC"-dev findutils libcmocka-dev python3-setuptools ; fi
-  - if [ "$MODERN" = "no" ]; then sudo apt update ; sudo apt install -y git libtool $EXTRA libpixman-1-dev automake bison libglib2.0 build-essential gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-dev findutils libcmocka-dev python3-setuptools ; fi
-
-script:
-  - gcc -v
-  - clang -v
-  - sudo -E ./afl-system-config
-  - sudo sysctl -w kernel.shmmax=10000000000
-  - if [ "$TRAVIS_OS_NAME" = "osx" ]; then export LLVM_CONFIG=`pwd`/"$NAME" ; make source-only ASAN_BUILD=1 ; fi
-  - if [ "$TRAVIS_OS_NAME" = "linux" -a "$TRAVIS_CPU_ARCH" = "amd64" ]; then make distrib ASAN_BUILD=1 ; fi
-  - if [ "$TRAVIS_CPU_ARCH" = "arm64" ] ; then export LLVM_CONFIG=llvm-config-6.0 ; make ASAN_BUILD=1 ; cd qemu_mode && sh ./build_qemu_support.sh ; cd .. ; fi
-  - make tests
-#  - travis_terminate 0
diff --git a/GNUmakefile b/GNUmakefile
index 5c82279b..71b41227 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -42,8 +42,8 @@ endif
 
 ifdef ASAN_BUILD
   $(info Compiling ASAN version of binaries)
-  override CFLAGS+=$(ASAN_CFLAGS)
-  LDFLAGS+=$(ASAN_LDFLAGS)
+  override CFLAGS += $(ASAN_CFLAGS)
+  LDFLAGS += $(ASAN_LDFLAGS)
 endif
 ifdef UBSAN_BUILD
   $(info Compiling UBSAN version of binaries)
@@ -77,30 +77,34 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -fno-move-loop-invariants -
 	SPECIAL_PERFORMANCE += -fno-move-loop-invariants -fdisable-tree-cunrolli
 endif
 
+ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+  ifndef SOURCE_DATE_EPOCH
+    HAVE_MARCHNATIVE = 1
+    CFLAGS_OPT += -march=native
+  endif
+endif
+
 ifneq "$(shell uname)" "Darwin"
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-   ifndef SOURCE_DATE_EPOCH
- 	#CFLAGS_OPT += -march=native
- 	SPECIAL_PERFORMANCE += -march=native
-   endif
- endif
+  ifeq "$(HAVE_MARCHNATIVE)" "1"
+    SPECIAL_PERFORMANCE += -march=native
+  endif
  # OS X does not like _FORTIFY_SOURCE=2
- ifndef DEBUG
-   CFLAGS_OPT += -D_FORTIFY_SOURCE=2
- endif
+  ifndef DEBUG
+    CFLAGS_OPT += -D_FORTIFY_SOURCE=2
+  endif
 endif
 
 ifeq "$(shell uname)" "SunOS"
- CFLAGS_OPT += -Wno-format-truncation
- LDFLAGS=-lkstat -lrt
+  CFLAGS_OPT += -Wno-format-truncation
+  LDFLAGS = -lkstat -lrt
 endif
 
 ifdef STATIC
   $(info Compiling static version of binaries, disabling python though)
   # Disable python for static compilation to simplify things
-  PYTHON_OK=0
+  PYTHON_OK = 0
   PYFLAGS=
-  PYTHON_INCLUDE=/
+  PYTHON_INCLUDE = /
 
   CFLAGS_OPT += -static
   LDFLAGS += -lm -lpthread -lz -lutil
@@ -117,6 +121,7 @@ ifdef INTROSPECTION
   CFLAGS_OPT += -DINTROSPECTION=1
 endif
 
+
 ifneq "$(shell uname -m)" "x86_64"
  ifneq "$(patsubst i%86,i386,$(shell uname -m))" "i386"
   ifneq "$(shell uname -m)" "amd64"
@@ -131,7 +136,7 @@ ifdef DEBUG
   $(info Compiling DEBUG version of binaries)
   CFLAGS += -ggdb3 -O0 -Wall -Wextra -Werror
 else
-  CFLAGS     ?= -O3 -funroll-loops $(CFLAGS_OPT)
+  CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT)
 endif
 
 override CFLAGS += -g -Wno-pointer-sign -Wno-variadic-macros -Wall -Wextra -Wpointer-arith \
@@ -512,7 +517,7 @@ code-format:
 ifndef AFL_NO_X86
 test_build: afl-cc afl-gcc afl-as afl-showmap
 	@echo "[*] Testing the CC wrapper afl-cc and its instrumentation output..."
-	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
+	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN; AFL_CC=$(CC) ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-gcc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
@@ -520,7 +525,7 @@ test_build: afl-cc afl-gcc afl-as afl-showmap
 	@echo
 	@echo "[+] All right, the instrumentation of afl-cc seems to be working!"
 	@echo "[*] Testing the CC wrapper afl-gcc and its instrumentation output..."
-	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; AFL_INST_RATIO=100 AFL_PATH=. ./afl-gcc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-gcc failed"; exit 1 )
+	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN; AFL_CC=$(CC) ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-gcc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-gcc failed"; exit 1 )
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
@@ -586,7 +591,7 @@ distrib: all
 	-cd unicorn_mode && unset CFLAGS && sh ./build_unicorn_support.sh
 
 .PHONY: binary-only
-binary-only: all
+binary-only: test_shm test_python ready $(PROGS)
 	$(MAKE) -C utils/libdislocator
 	$(MAKE) -C utils/libtokencap
 	$(MAKE) -C utils/afl_network_proxy
diff --git a/GNUmakefile.gcc_plugin b/GNUmakefile.gcc_plugin
index ee211c24..aa93c688 100644
--- a/GNUmakefile.gcc_plugin
+++ b/GNUmakefile.gcc_plugin
@@ -138,7 +138,7 @@ afl-common.o: ./src/afl-common.c
 .PHONY: test_build
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN; AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. AFL_CC=$(CC) ./afl-gcc-fast $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
+	unset AFL_USE_ASAN AFL_USE_MSAN; ASAN_OPTIONS=detect_leaks=0 AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. AFL_CC=$(CC) ./afl-gcc-fast $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr </dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
diff --git a/GNUmakefile.llvm b/GNUmakefile.llvm
index 414cd487..3554c8bf 100644
--- a/GNUmakefile.llvm
+++ b/GNUmakefile.llvm
@@ -357,7 +357,7 @@ instrumentation/afl-common.o: ./src/afl-common.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ $(LDFLAGS)
 
 ./afl-cc: src/afl-cc.c instrumentation/afl-common.o
-	$(CC) $(CLANG_CFL) $(CFLAGS) $(CPPFLAGS) $< instrumentation/afl-common.o -o $@ -DLLVM_MINOR=$(LLVM_MINOR) -DLLVM_MAJOR=$(LLVM_MAJOR) $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\"
+	$(CC) $(CLANG_CFL) $(CFLAGS) $(CPPFLAGS) $< instrumentation/afl-common.o -o $@ -DLLVM_MINOR=$(LLVM_MINOR) -DLLVM_MAJOR=$(LLVM_MAJOR) $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\" -lm
 	@ln -sf afl-cc ./afl-c++
 	@ln -sf afl-cc ./afl-gcc
 	@ln -sf afl-cc ./afl-g++
@@ -452,7 +452,7 @@ document:
 .PHONY: test_build
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_LLVM_LAF_ALL=1 ./afl-cc $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
+	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; ASAN_OPTIONS=detect_leaks=0 AFL_QUIET=1 AFL_PATH=. AFL_LLVM_LAF_ALL=1 ./afl-cc $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
diff --git a/README.md b/README.md
index 68b64ce6..a0e7a7e4 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
   Release Version: [3.00c](https://github.com/AFLplusplus/AFLplusplus/releases)
 
-  Github Version: 3.00a
+  Github Version: 3.01a
 
   Repository: [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
 
diff --git a/afl-cmin b/afl-cmin
index 292d9d9d..eef2b7ef 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -366,33 +366,35 @@ BEGIN {
     cp_tool = "cp"
   }
 
-  # Make sure that we can actually get anything out of afl-showmap before we
-  # waste too much time.
+  if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) {
+    # Make sure that we can actually get anything out of afl-showmap before we
+    # waste too much time.
 
-  print "[*] Testing the target binary..."
+    print "[*] Testing the target binary..."
 
-  if (!stdin_file) {
-    system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
-  } else {
-    system("cp "in_dir"/"first_file" "stdin_file)
-    system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
-  }
+    if (!stdin_file) {
+      system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
+    } else {
+      system("cp "in_dir"/"first_file" "stdin_file)
+      system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
+    }
 
-  first_count = 0
+    first_count = 0
 
-  runtest = trace_dir"/.run_test"
-  while ((getline < runtest) > 0) {
-    ++first_count
-  }
+    runtest = trace_dir"/.run_test"
+    while ((getline < runtest) > 0) {
+      ++first_count
+    }
 
-  if (first_count) {
-    print "[+] OK, "first_count" tuples recorded."
-  } else {
-    print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
-    if (!ENVIRON["AFL_KEEP_TRACES"]) {
-      system("rm -rf "trace_dir" 2>/dev/null")
+    if (first_count) {
+      print "[+] OK, "first_count" tuples recorded."
+    } else {
+      print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
+      if (!ENVIRON["AFL_KEEP_TRACES"]) {
+        system("rm -rf "trace_dir" 2>/dev/null")
+      }
+      exit 1
     }
-    exit 1
   }
 
   # Let's roll!
diff --git a/afl-system-config b/afl-system-config
index 4ca9f0a9..7031544c 100755
--- a/afl-system-config
+++ b/afl-system-config
@@ -80,3 +80,4 @@ if [ "$PLATFORM" = "Darwin" ] ; then
   DONE=1
 fi
 test -z "$DONE" && echo Error: Unknown platform: $PLATFORM
+exit 0
diff --git a/docs/Changelog.md b/docs/Changelog.md
index d77c276b..e36e4e9f 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -9,6 +9,21 @@ Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 
+### Version ++3.01a (release)
+  - afl-fuzz
+    - fix crash for very, very fast targets+systems (thanks to mhlakhani
+      for reporting)
+    - switched to a faster RNG
+    - added hghwng's patch for faster trace map analysis
+  - afl-cc
+    - allow instrumenting LLVMFuzzerTestOneInput
+    - fixed endless loop for allow/blocklist lines starting with a
+      comment (thanks to Zherya for reporting)
+    - added AFL_LLVM_INSTRUMENT option NATIVE for native clang pc-guard
+      support (less performant than our own)
+  - added dummy Makefile to instrumentation/
+
+
 ### Version ++3.00c (release)
   - llvm_mode/ and gcc_plugin/ moved to instrumentation/
   - examples/ renamed to utils/
@@ -46,6 +61,8 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
     - somewhere we broke -n dumb fuzzing, fixed
     - added afl_custom_describe to the custom mutator API to allow for easy
       mutation reproduction on crashing inputs
+    - new env. var. AFL_NO_COLOR (or AFL_NO_COLOUR) to suppress colored
+      console output (when configured with USE_COLOR and not ALWAYS_COLORED)
   - instrumentation
     - We received an enhanced gcc_plugin module from AdaCore, thank you
       very much!!
diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md
index 66734452..787d970d 100644
--- a/docs/binaryonly_fuzzing.md
+++ b/docs/binaryonly_fuzzing.md
@@ -174,7 +174,7 @@
 
   Pintool and Dynamorio are dynamic instrumentation engines, and they can be
   used for getting basic block information at runtime.
-  Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
+  Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows,
   whereas Dynamorio is additionally available for ARM and AARCH64.
   Dynamorio is also 10x faster than Pintool.
 
@@ -182,7 +182,7 @@
   Dynamorio has a speed decrease of 98-99%
   Pintool has a speed decrease of 99.5%
 
-  Hence Dynamorio is the option to go for if everything fails, and Pintool
+  Hence Dynamorio is the option to go for if everything else fails, and Pintool
   only if Dynamorio fails too.
 
   Dynamorio solutions:
@@ -205,6 +205,7 @@
   * QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
   * Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
   * S2E: [https://github.com/S2E](https://github.com/S2E)
+  * Tinyinst [https://github.com/googleprojectzero/TinyInst](https://github.com/googleprojectzero/TinyInst) (Mac/Windows only)
   *  ... please send me any missing that are good
 
 
diff --git a/docs/env_variables.md b/docs/env_variables.md
index e203055f..c1693748 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -113,6 +113,8 @@ Then there are a few specific features that are only available in instrumentatio
 
     - `AFL_LLVM_INSTRUMENT` - this configures the instrumentation mode. 
       Available options:
+        PCGUARD - our own pcgard based instrumentation (default)
+        NATIVE - clang's original pcguard based instrumentation
         CLASSIC - classic AFL (map[cur_loc ^ prev_loc >> 1]++) (default)
         CFG - InsTrim instrumentation (see below)
         LTO - LTO instrumentation (see below)
@@ -381,6 +383,9 @@ checks or alter some of the more exotic semantics of the tool:
     some basic stats. This behavior is also automatically triggered when the
     output from afl-fuzz is redirected to a file or to a pipe.
 
+  - Setting `AFL_NO_COLOR` or `AFL_NO_COLOUR` will omit control sequences for
+    coloring console output when configured with USE_COLOR and not ALWAYS_COLORED.
+
   - Setting `AFL_FORCE_UI` will force painting the UI on the screen even if
     no valid terminal was detected (for virtual consoles)
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 2f2d31d3..e2fb0344 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -134,6 +134,12 @@
 // Little helper to access the ptr to afl->##name_buf - for use in afl_realloc.
 #define AFL_BUF_PARAM(name) ((void **)&afl->name##_buf)
 
+#ifdef WORD_SIZE_64
+  #define AFL_RAND_RETURN u64
+#else
+  #define AFL_RAND_RETURN u32
+#endif
+
 extern s8  interesting_8[INTERESTING_8_LEN];
 extern s16 interesting_16[INTERESTING_8_LEN + INTERESTING_16_LEN];
 extern s32
@@ -580,7 +586,7 @@ typedef struct afl_state {
 
   u32 rand_cnt;                         /* Random number counter            */
 
-  u64 rand_seed[4];
+  u64 rand_seed[3];
   s64 init_seed;
 
   u64 total_cal_us,                     /* Total calibration time (us)      */
@@ -1014,12 +1020,12 @@ void write_bitmap(afl_state_t *);
 u32  count_bits(afl_state_t *, u8 *);
 u32  count_bytes(afl_state_t *, u8 *);
 u32  count_non_255_bytes(afl_state_t *, u8 *);
-#ifdef WORD_SIZE_64
-void simplify_trace(afl_state_t *, u64 *);
+void simplify_trace(afl_state_t *, u8 *);
 void classify_counts(afl_forkserver_t *);
+#ifdef WORD_SIZE_64
+void discover_word(u8 *ret, u64 *current, u64 *virgin);
 #else
-void simplify_trace(afl_state_t *, u32 *);
-void classify_counts(afl_forkserver_t *);
+void discover_word(u8 *ret, u32 *current, u32 *virgin);
 #endif
 void init_count_class16(void);
 void minimize_bits(afl_state_t *, u8 *, u8 *);
@@ -1028,6 +1034,7 @@ u8 *describe_op(afl_state_t *, u8, size_t);
 #endif
 u8 save_if_interesting(afl_state_t *, void *, u32, u8);
 u8 has_new_bits(afl_state_t *, u8 *);
+u8 has_new_bits_unclassified(afl_state_t *, u8 *);
 
 /* Extras */
 
@@ -1111,8 +1118,7 @@ u8 common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len);
 u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
                         u64 exec_cksum);
 
-/* xoshiro256** */
-uint64_t rand_next(afl_state_t *afl);
+AFL_RAND_RETURN rand_next(afl_state_t *afl);
 
 /* probability between 0.0 and 1.0 */
 double rand_next_percent(afl_state_t *afl);
diff --git a/include/config.h b/include/config.h
index 93249ed9..e8a49270 100644
--- a/include/config.h
+++ b/include/config.h
@@ -28,7 +28,7 @@
 /* Version string: */
 
 // c = release, d = volatile github dev, e = experimental branch
-#define VERSION "++3.00c"
+#define VERSION "++3.01a"
 
 /******************************************************
  *                                                    *
@@ -36,11 +36,28 @@
  *                                                    *
  ******************************************************/
 
+/* console output colors: There are three ways to configure its behavior
+ * 1. default: colored outputs fixed on: defined USE_COLOR && defined
+ * ALWAYS_COLORED The env var. AFL_NO_COLOR will have no effect
+ * 2. defined USE_COLOR && !defined ALWAYS_COLORED
+ *    -> depending on env var AFL_NO_COLOR=1 colors can be switched off
+ *    at run-time. Default is to use colors.
+ * 3. colored outputs fixed off: !defined USE_COLOR
+ *    The env var. AFL_NO_COLOR will have no effect
+ */
+
 /* Comment out to disable terminal colors (note that this makes afl-analyze
    a lot less nice): */
 
 #define USE_COLOR
 
+#ifdef USE_COLOR
+  /* Comment in to always enable terminal colors */
+  /* Comment out to enable runtime controlled terminal colors via AFL_NO_COLOR
+   */
+  #define ALWAYS_COLORED 1
+#endif
+
 /* StatsD config
    Config can be adjusted via AFL_STATSD_HOST and AFL_STATSD_PORT environment
    variable.
diff --git a/include/coverage-32.h b/include/coverage-32.h
new file mode 100644
index 00000000..a5cc498c
--- /dev/null
+++ b/include/coverage-32.h
@@ -0,0 +1,112 @@
+#include "config.h"
+#include "types.h"
+
+u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end);
+u32 classify_word(u32 word);
+
+inline u32 classify_word(u32 word) {
+
+  u16 mem16[2];
+  memcpy(mem16, &word, sizeof(mem16));
+
+  mem16[0] = count_class_lookup16[mem16[0]];
+  mem16[1] = count_class_lookup16[mem16[1]];
+
+  memcpy(&word, mem16, sizeof(mem16));
+  return word;
+
+}
+
+void simplify_trace(afl_state_t *afl, u8 *bytes) {
+
+  u32 *mem = (u32 *)bytes;
+  u32  i = (afl->fsrv.map_size >> 2);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) {
+
+      u8 *mem8 = (u8 *)mem;
+
+      mem8[0] = simplify_lookup[mem8[0]];
+      mem8[1] = simplify_lookup[mem8[1]];
+      mem8[2] = simplify_lookup[mem8[2]];
+      mem8[3] = simplify_lookup[mem8[3]];
+
+    } else
+
+      *mem = 0x01010101;
+
+    mem++;
+
+  }
+
+}
+
+inline void classify_counts(afl_forkserver_t *fsrv) {
+
+  u32 *mem = (u32 *)fsrv->trace_bits;
+  u32  i = (fsrv->map_size >> 2);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) { *mem = classify_word(*mem); }
+
+    mem++;
+
+  }
+
+}
+
+/* Updates the virgin bits, then reflects whether a new count or a new tuple is
+ * seen in ret. */
+inline void discover_word(u8 *ret, u32 *current, u32 *virgin) {
+
+  /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
+     that have not been already cleared from the virgin map - since this will
+     almost always be the case. */
+
+  if (*current & *virgin) {
+
+    if (likely(*ret < 2)) {
+
+      u8 *cur = (u8 *)current;
+      u8 *vir = (u8 *)virgin;
+
+      /* Looks like we have not found any new bytes yet; see if any non-zero
+         bytes in current[] are pristine in virgin[]. */
+
+      if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
+          (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff))
+        *ret = 2;
+      else
+        *ret = 1;
+
+    }
+
+    *virgin &= ~*current;
+
+  }
+
+}
+
+#define PACK_SIZE 16
+inline u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end) {
+
+  for (; current != current_end; virgin += 4, current += 4) {
+
+    if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
+    if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
+    if (current[2] && classify_word(current[2]) & virgin[2]) return 1;
+    if (current[3] && classify_word(current[3]) & virgin[3]) return 1;
+
+  }
+
+  return 0;
+
+}
+
diff --git a/include/coverage-64.h b/include/coverage-64.h
new file mode 100644
index 00000000..0ede5fa5
--- /dev/null
+++ b/include/coverage-64.h
@@ -0,0 +1,189 @@
+#include "config.h"
+#include "types.h"
+
+#if (defined(__AVX512F__) && defined(__AVX512DQ__)) || defined(__AVX2__)
+  #include <immintrin.h>
+#endif
+
+u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end);
+u64 classify_word(u64 word);
+
+inline u64 classify_word(u64 word) {
+
+  u16 mem16[4];
+  memcpy(mem16, &word, sizeof(mem16));
+
+  mem16[0] = count_class_lookup16[mem16[0]];
+  mem16[1] = count_class_lookup16[mem16[1]];
+  mem16[2] = count_class_lookup16[mem16[2]];
+  mem16[3] = count_class_lookup16[mem16[3]];
+
+  memcpy(&word, mem16, sizeof(mem16));
+  return word;
+
+}
+
+void simplify_trace(afl_state_t *afl, u8 *bytes) {
+
+  u64 *mem = (u64 *)bytes;
+  u32  i = (afl->fsrv.map_size >> 3);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) {
+
+      u8 *mem8 = (u8 *)mem;
+
+      mem8[0] = simplify_lookup[mem8[0]];
+      mem8[1] = simplify_lookup[mem8[1]];
+      mem8[2] = simplify_lookup[mem8[2]];
+      mem8[3] = simplify_lookup[mem8[3]];
+      mem8[4] = simplify_lookup[mem8[4]];
+      mem8[5] = simplify_lookup[mem8[5]];
+      mem8[6] = simplify_lookup[mem8[6]];
+      mem8[7] = simplify_lookup[mem8[7]];
+
+    } else
+
+      *mem = 0x0101010101010101ULL;
+
+    mem++;
+
+  }
+
+}
+
+inline void classify_counts(afl_forkserver_t *fsrv) {
+
+  u64 *mem = (u64 *)fsrv->trace_bits;
+  u32  i = (fsrv->map_size >> 3);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) { *mem = classify_word(*mem); }
+
+    mem++;
+
+  }
+
+}
+
+/* Updates the virgin bits, then reflects whether a new count or a new tuple is
+ * seen in ret. */
+inline void discover_word(u8 *ret, u64 *current, u64 *virgin) {
+
+  /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
+     that have not been already cleared from the virgin map - since this will
+     almost always be the case. */
+
+  if (*current & *virgin) {
+
+    if (likely(*ret < 2)) {
+
+      u8 *cur = (u8 *)current;
+      u8 *vir = (u8 *)virgin;
+
+      /* Looks like we have not found any new bytes yet; see if any non-zero
+         bytes in current[] are pristine in virgin[]. */
+
+      if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
+          (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) ||
+          (cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) ||
+          (cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff))
+        *ret = 2;
+      else
+        *ret = 1;
+
+    }
+
+    *virgin &= ~*current;
+
+  }
+
+}
+
+#if defined(__AVX512F__) && defined(__AVX512DQ__)
+  #define PACK_SIZE 64
+inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
+
+  for (; current != current_end; virgin += 8, current += 8) {
+
+    __m512i  value = *(__m512i *)current;
+    __mmask8 mask = _mm512_testn_epi64_mask(value, value);
+
+    /* All bytes are zero. */
+    if (mask == 0xff) continue;
+
+      /* Look for nonzero bytes and check for new bits. */
+  #define UNROLL(x) \
+    if (!(mask & (1 << x)) && classify_word(current[x]) & virgin[x]) return 1
+    UNROLL(0);
+    UNROLL(1);
+    UNROLL(2);
+    UNROLL(3);
+    UNROLL(4);
+    UNROLL(5);
+    UNROLL(6);
+    UNROLL(7);
+  #undef UNROLL
+
+  }
+
+  return 0;
+
+}
+
+#endif
+
+#if !defined(PACK_SIZE) && defined(__AVX2__)
+  #define PACK_SIZE 32
+inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
+
+  __m256i zeroes = _mm256_setzero_si256();
+
+  for (; current != current_end; virgin += 4, current += 4) {
+
+    __m256i value = *(__m256i *)current;
+    __m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
+    u32     mask = _mm256_movemask_epi8(cmp);
+
+    /* All bytes are zero. */
+    if (mask == (u32)-1) continue;
+
+    /* Look for nonzero bytes and check for new bits. */
+    if (!(mask & 0xff) && classify_word(current[0]) & virgin[0]) return 1;
+    if (!(mask & 0xff00) && classify_word(current[1]) & virgin[1]) return 1;
+    if (!(mask & 0xff0000) && classify_word(current[2]) & virgin[2]) return 1;
+    if (!(mask & 0xff000000) && classify_word(current[3]) & virgin[3]) return 1;
+
+  }
+
+  return 0;
+
+}
+
+#endif
+
+#if !defined(PACK_SIZE)
+  #define PACK_SIZE 32
+inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
+
+  for (; current != current_end; virgin += 4, current += 4) {
+
+    if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
+    if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
+    if (current[2] && classify_word(current[2]) & virgin[2]) return 1;
+    if (current[3] && classify_word(current[3]) & virgin[3]) return 1;
+
+  }
+
+  return 0;
+
+}
+
+#endif
+
diff --git a/include/debug.h b/include/debug.h
index 5512023c..7f4a6be1 100644
--- a/include/debug.h
+++ b/include/debug.h
@@ -168,12 +168,84 @@
  * Debug & error macros *
  ************************/
 
-/* Just print stuff to the appropriate stream. */
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+  #include <unistd.h>
+  #pragma GCC diagnostic ignored "-Wformat-security"
+static inline const char *colorfilter(const char *x) {
+
+  static int once = 1;
+  static int disabled = 0;
+
+  if (once) {
+
+    /* when there is no tty -> we always want filtering
+     * when AFL_NO_UI is set filtering depends on AFL_NO_COLOR
+     * otherwise we want always colors
+     */
+    disabled =
+        isatty(2) && (!getenv("AFL_NO_UI") ||
+                      (!getenv("AFL_NO_COLOR") && !getenv("AFL_NO_COLOUR")));
+    once = 0;
+
+  }
+
+  if (likely(disabled)) return x;
+
+  static char monochromestring[4096];
+  char *      d = monochromestring;
+  int         in_seq = 0;
+
+  while (*x) {
+
+    if (in_seq && *x == 'm') {
+
+      in_seq = 0;
+
+    } else {
 
+      if (!in_seq && *x == '\x1b') { in_seq = 1; }
+      if (!in_seq) { *d++ = *x; }
+
+    }
+
+    ++x;
+
+  }
+
+  *d = '\0';
+  return monochromestring;
+
+}
+
+#else
+  #define colorfilter(x) x                        /* no filtering necessary */
+#endif
+
+/* macro magic to transform the first parameter to SAYF
+ * through colorfilter which strips coloring */
+#define GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, \
+                  _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26,  \
+                  _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38,  \
+                  _39, _40, NAME, ...)                                         \
+  NAME
+
+#define SAYF(...)                                                           \
+  GET_MACRO(__VA_ARGS__, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N,    \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_1)                                                 \
+  (__VA_ARGS__)
+
+#define SAYF_1(x) MY_SAYF(colorfilter(x))
+#define SAYF_N(x, ...) MY_SAYF(colorfilter(x), __VA_ARGS__)
+
+/* Just print stuff to the appropriate stream. */
 #ifdef MESSAGES_TO_STDOUT
-  #define SAYF(x...) printf(x)
+  #define MY_SAYF(x...) printf(x)
 #else
-  #define SAYF(x...) fprintf(stderr, x)
+  #define MY_SAYF(x...) fprintf(stderr, x)
 #endif                                               /* ^MESSAGES_TO_STDOUT */
 
 /* Show a prefixed warning. */
diff --git a/include/envs.h b/include/envs.h
index c0f41ca5..e4e49c4d 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -78,8 +78,8 @@ static char *afl_environment_variables[] = {
     "AFL_LLVM_CTX",
     "AFL_LLVM_DICT2FILE",
     "AFL_LLVM_DOCUMENT_IDS",
-    "AFL_LLVM_INSTRUMENT",
     "AFL_LLVM_INSTRIM_LOOPHEAD",
+    "AFL_LLVM_INSTRUMENT",
     "AFL_LLVM_LTO_AUTODICTIONARY",
     "AFL_LLVM_AUTODICTIONARY",
     "AFL_LLVM_SKIPSINGLEBLOCK",
@@ -103,6 +103,10 @@ static char *afl_environment_variables[] = {
     "AFL_NO_ARITH",
     "AFL_NO_AUTODICT",
     "AFL_NO_BUILTIN",
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+    "AFL_NO_COLOR",
+    "AFL_NO_COLOUR",
+#endif
     "AFL_NO_CPU_RED",
     "AFL_NO_FORKSRV",
     "AFL_NO_UI",
diff --git a/include/forkserver.h b/include/forkserver.h
index 5d5c728f..8e029266 100644
--- a/include/forkserver.h
+++ b/include/forkserver.h
@@ -64,7 +64,7 @@ typedef struct afl_forkserver {
 
   FILE *plot_file;                      /* Gnuplot output file              */
 
-  /* Note: lat_run_timed_out is u32 to send it to the child as 4 byte array */
+  /* Note: last_run_timed_out is u32 to send it to the child as 4 byte array */
   u32 last_run_timed_out;               /* Traced process timed out?        */
 
   u8 last_kill_signal;                  /* Signal that killed the child     */
diff --git a/instrumentation/Makefile b/instrumentation/Makefile
new file mode 100644
index 00000000..6cdd1a07
--- /dev/null
+++ b/instrumentation/Makefile
@@ -0,0 +1,2 @@
+all:
+	@echo "no need to do make in the instrumentation/ directory :) - it is all done in the main one"
diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md
index 07636970..2705ce0d 100644
--- a/instrumentation/README.llvm.md
+++ b/instrumentation/README.llvm.md
@@ -168,26 +168,7 @@ This is the most powerful and effective fuzzing you can do.
 Please see [README.persistent_mode.md](README.persistent_mode.md) for a
 full explanation.
 
-## 7) Bonus feature: 'trace-pc-guard' mode
-
-LLVM is shipping with a built-in execution tracing feature
-that provides AFL with the necessary tracing data without the need to
-post-process the assembly or install any compiler plugins. See:
-
-  http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards
-
-If you have not an outdated compiler and want to give it a try, build
-targets this way:
-
-```
-AFL_LLVM_INSTRUMENT=PCGUARD  make
-```
-
-Note that this is currently the default if you use LLVM >= 7, as it is the best
-mode. Recommended is LLVM >= 9.
-If you have llvm 11+ and compiled afl-clang-lto - this is the only better mode.
-
-## 8) Bonus feature: 'dict2file' pass
+## 7) Bonus feature: 'dict2file' pass
 
 Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
 all constant string compare parameters will be written to this file to be
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index b1df26db..cddde87c 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -236,8 +236,8 @@ static void __afl_map_shm(void) {
 
   if (__afl_final_loc) {
 
-    if (__afl_final_loc % 8)
-      __afl_final_loc = (((__afl_final_loc + 7) >> 3) << 3);
+    if (__afl_final_loc % 32)
+      __afl_final_loc = (((__afl_final_loc + 31) >> 5) << 5);
     __afl_map_size = __afl_final_loc;
 
     if (__afl_final_loc > MAP_SIZE) {
diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc
index e116e7d1..25437609 100644
--- a/instrumentation/afl-gcc-pass.so.cc
+++ b/instrumentation/afl-gcc-pass.so.cc
@@ -516,7 +516,9 @@ struct afl_pass : gimple_opt_pass {
         "__cmplog",
         "__sancov",
         "msan.",
-        "LLVMFuzzer",
+        "LLVMFuzzerM",
+        "LLVMFuzzerC",
+        "LLVMFuzzerI",
         "__decide_deferred",
         "maybe_duplicate_stderr",
         "discard_output",
@@ -620,10 +622,11 @@ struct afl_pass : gimple_opt_pass {
             allowListFiles.push_back(line);
           else
             allowListFunctions.push_back(line);
-          getline(fileStream, line);
 
         }
 
+        getline(fileStream, line);
+
       }
 
       if (debug)
@@ -694,10 +697,11 @@ struct afl_pass : gimple_opt_pass {
             denyListFiles.push_back(line);
           else
             denyListFunctions.push_back(line);
-          getline(fileStream, line);
 
         }
 
+        getline(fileStream, line);
+
       }
 
       if (debug)
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index 21c4d204..a27c4069 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -70,7 +70,9 @@ bool isIgnoreFunction(const llvm::Function *F) {
       "__cmplog",
       "__sancov",
       "msan.",
-      "LLVMFuzzer",
+      "LLVMFuzzerM",
+      "LLVMFuzzerC",
+      "LLVMFuzzerI",
       "__decide_deferred",
       "maybe_duplicate_stderr",
       "discard_output",
@@ -166,10 +168,11 @@ void initInstrumentList() {
           allowListFiles.push_back(line);
         else
           allowListFunctions.push_back(line);
-        getline(fileStream, line);
 
       }
 
+      getline(fileStream, line);
+
     }
 
     if (debug)
@@ -240,10 +243,11 @@ void initInstrumentList() {
           denyListFiles.push_back(line);
         else
           denyListFunctions.push_back(line);
-        getline(fileStream, line);
 
       }
 
+      getline(fileStream, line);
+
     }
 
     if (debug)
diff --git a/src/afl-cc.c b/src/afl-cc.c
index 2aeb2178..3b8092a9 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -1346,6 +1346,10 @@ int main(int argc, char **argv, char **envp) {
         "Sub-Modes: (set via env AFL_LLVM_INSTRUMENT, afl-cc selects the best "
         "available)\n"
         "  PCGUARD: Dominator tree instrumentation (best!) (README.llvm.md)\n"
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+        "  NATIVE:  use llvm's native PCGUARD instrumentation (less "
+        "performant)\n"
+#endif
         "  CLASSIC: decision target instrumentation (README.llvm.md)\n"
         "  CTX:     CLASSIC + callee context (instrumentation/README.ctx.md)\n"
         "  NGRAM-x: CLASSIC + previous path "
@@ -1432,7 +1436,7 @@ int main(int argc, char **argv, char **envp) {
             "  AFL_LLVM_LAF_SPLIT_FLOATS: cascaded comparisons on floats\n"
             "  AFL_LLVM_LAF_TRANSFORM_COMPARES: cascade comparisons for string "
             "functions\n"
-            "  AFL_LLVM_INSTRUMENT_ALLOW/AFL_LLVM_INSTRUMENT_DENY: enable "
+            "  AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST: enable "
             "instrument allow/\n"
             "    deny listing (selective instrumentation)\n");
 
diff --git a/src/afl-common.c b/src/afl-common.c
index 4df22394..6dc8abe0 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <strings.h>
+#include <math.h>
 
 #include "debug.h"
 #include "alloc-inl.h"
@@ -786,6 +787,10 @@ u8 *u_stringify_float(u8 *buf, double val) {
 
     sprintf(buf, "%0.01f", val);
 
+  } else if (unlikely(isnan(val) || isfinite(val))) {
+
+    strcpy(buf, "999.9");
+
   } else {
 
     return u_stringify_int(buf, (u64)val);
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 3afb94be..90fa55e9 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -641,11 +641,11 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
         if (!fsrv->map_size) { fsrv->map_size = MAP_SIZE; }
 
-        if (unlikely(tmp_map_size % 8)) {
+        if (unlikely(tmp_map_size % 32)) {
 
           // should not happen
           WARNF("Target reported non-aligned map size of %u", tmp_map_size);
-          tmp_map_size = (((tmp_map_size + 8) >> 3) << 3);
+          tmp_map_size = (((tmp_map_size + 31) >> 5) << 5);
 
         }
 
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index f1ca7400..738ba986 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -49,101 +49,6 @@ void write_bitmap(afl_state_t *afl) {
 
 }
 
-/* Check if the current execution path brings anything new to the table.
-   Update virgin bits to reflect the finds. Returns 1 if the only change is
-   the hit-count for a particular tuple; 2 if there are new tuples seen.
-   Updates the map, so subsequent calls will always return 0.
-
-   This function is called after every exec() on a fairly large buffer, so
-   it needs to be fast. We do this in 32-bit and 64-bit flavors. */
-
-u8 __attribute__((hot)) has_new_bits(afl_state_t *afl, u8 *virgin_map) {
-
-#ifdef WORD_SIZE_64
-
-  u64 *current = (u64 *)afl->fsrv.trace_bits;
-  u64 *virgin = (u64 *)virgin_map;
-
-  u32 i = (afl->fsrv.map_size >> 3);
-
-#else
-
-  u32 *current = (u32 *)afl->fsrv.trace_bits;
-  u32 *virgin = (u32 *)virgin_map;
-
-  u32 i = (afl->fsrv.map_size >> 2);
-
-#endif                                                     /* ^WORD_SIZE_64 */
-  // the map size must be a minimum of 8 bytes.
-  // for variable/dynamic map sizes this is ensured in the forkserver
-
-  u8 ret = 0;
-
-  while (i--) {
-
-    /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
-       that have not been already cleared from the virgin map - since this will
-       almost always be the case. */
-
-    // the (*current) is unnecessary but speeds up the overall comparison
-    if (unlikely(*current) && unlikely(*current & *virgin)) {
-
-      if (likely(ret < 2)) {
-
-        u8 *cur = (u8 *)current;
-        u8 *vir = (u8 *)virgin;
-
-        /* Looks like we have not found any new bytes yet; see if any non-zero
-           bytes in current[] are pristine in virgin[]. */
-
-#ifdef WORD_SIZE_64
-
-        if (*virgin == 0xffffffffffffffff || (cur[0] && vir[0] == 0xff) ||
-            (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) ||
-            (cur[3] && vir[3] == 0xff) || (cur[4] && vir[4] == 0xff) ||
-            (cur[5] && vir[5] == 0xff) || (cur[6] && vir[6] == 0xff) ||
-            (cur[7] && vir[7] == 0xff)) {
-
-          ret = 2;
-
-        } else {
-
-          ret = 1;
-
-        }
-
-#else
-
-        if (*virgin == 0xffffffff || (cur[0] && vir[0] == 0xff) ||
-            (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) ||
-            (cur[3] && vir[3] == 0xff))
-          ret = 2;
-        else
-          ret = 1;
-
-#endif                                                     /* ^WORD_SIZE_64 */
-
-      }
-
-      *virgin &= ~*current;
-
-    }
-
-    ++current;
-    ++virgin;
-
-  }
-
-  if (unlikely(ret) && likely(virgin_map == afl->virgin_bits)) {
-
-    afl->bitmap_changed = 1;
-
-  }
-
-  return ret;
-
-}
-
 /* Count the number of bits set in the provided bitmap. Used for the status
    screen several times every second, does not have to be fast. */
 
@@ -242,77 +147,11 @@ const u8 simplify_lookup[256] = {
 
 };
 
-#ifdef WORD_SIZE_64
-
-void simplify_trace(afl_state_t *afl, u64 *mem) {
-
-  u32 i = (afl->fsrv.map_size >> 3);
-
-  while (i--) {
-
-    /* Optimize for sparse bitmaps. */
-
-    if (unlikely(*mem)) {
-
-      u8 *mem8 = (u8 *)mem;
-
-      mem8[0] = simplify_lookup[mem8[0]];
-      mem8[1] = simplify_lookup[mem8[1]];
-      mem8[2] = simplify_lookup[mem8[2]];
-      mem8[3] = simplify_lookup[mem8[3]];
-      mem8[4] = simplify_lookup[mem8[4]];
-      mem8[5] = simplify_lookup[mem8[5]];
-      mem8[6] = simplify_lookup[mem8[6]];
-      mem8[7] = simplify_lookup[mem8[7]];
-
-    } else {
-
-      *mem = 0x0101010101010101ULL;
-
-    }
-
-    ++mem;
-
-  }
-
-}
-
-#else
-
-void simplify_trace(afl_state_t *afl, u32 *mem) {
-
-  u32 i = (afl->fsrv.map_size >> 2);
-
-  while (i--) {
-
-    /* Optimize for sparse bitmaps. */
-
-    if (unlikely(*mem)) {
-
-      u8 *mem8 = (u8 *)mem;
-
-      mem8[0] = simplify_lookup[mem8[0]];
-      mem8[1] = simplify_lookup[mem8[1]];
-      mem8[2] = simplify_lookup[mem8[2]];
-      mem8[3] = simplify_lookup[mem8[3]];
-
-    } else
-
-      *mem = 0x01010101;
-
-    ++mem;
-
-  }
-
-}
-
-#endif                                                     /* ^WORD_SIZE_64 */
-
 /* Destructively classify execution counts in a trace. This is used as a
    preprocessing step for any newly acquired traces. Called on every exec,
    must be fast. */
 
-static const u8 count_class_lookup8[256] = {
+const u8 count_class_lookup8[256] = {
 
     [0] = 0,
     [1] = 1,
@@ -326,7 +165,7 @@ static const u8 count_class_lookup8[256] = {
 
 };
 
-static u16 count_class_lookup16[65536];
+u16 count_class_lookup16[65536];
 
 void init_count_class16(void) {
 
@@ -345,63 +184,87 @@ void init_count_class16(void) {
 
 }
 
-#ifdef WORD_SIZE_64
+/* Import coverage processing routines. */
 
-void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) {
+#ifdef WORD_SIZE_64
+  #include "coverage-64.h"
+#else
+  #include "coverage-32.h"
+#endif
 
-  u64 *mem = (u64 *)fsrv->trace_bits;
+/* Check if the current execution path brings anything new to the table.
+   Update virgin bits to reflect the finds. Returns 1 if the only change is
+   the hit-count for a particular tuple; 2 if there are new tuples seen.
+   Updates the map, so subsequent calls will always return 0.
 
-  u32 i = (fsrv->map_size >> 3);
+   This function is called after every exec() on a fairly large buffer, so
+   it needs to be fast. We do this in 32-bit and 64-bit flavors. */
 
-  while (i--) {
+inline u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) {
 
-    /* Optimize for sparse bitmaps. */
+#ifdef WORD_SIZE_64
 
-    if (unlikely(*mem)) {
+  u64 *current = (u64 *)afl->fsrv.trace_bits;
+  u64 *virgin = (u64 *)virgin_map;
 
-      u16 *mem16 = (u16 *)mem;
+  u32 i = (afl->fsrv.map_size >> 3);
 
-      mem16[0] = count_class_lookup16[mem16[0]];
-      mem16[1] = count_class_lookup16[mem16[1]];
-      mem16[2] = count_class_lookup16[mem16[2]];
-      mem16[3] = count_class_lookup16[mem16[3]];
+#else
 
-    }
+  u32 *current = (u32 *)afl->fsrv.trace_bits;
+  u32 *virgin = (u32 *)virgin_map;
 
-    ++mem;
+  u32 i = (afl->fsrv.map_size >> 2);
 
-  }
+#endif                                                     /* ^WORD_SIZE_64 */
 
-}
+  u8 ret = 0;
+  while (i--) {
 
-#else
+    if (unlikely(*current)) discover_word(&ret, current, virgin);
 
-void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) {
+    current++;
+    virgin++;
 
-  u32 *mem = (u32 *)fsrv->trace_bits;
+  }
 
-  u32 i = (fsrv->map_size >> 2);
+  if (unlikely(ret) && likely(virgin_map == afl->virgin_bits))
+    afl->bitmap_changed = 1;
 
-  while (i--) {
+  return ret;
 
-    /* Optimize for sparse bitmaps. */
+}
 
-    if (unlikely(*mem)) {
+/* A combination of classify_counts and has_new_bits. If 0 is returned, then the
+ * trace bits are kept as-is. Otherwise, the trace bits are overwritten with
+ * classified values.
+ *
+ * This accelerates the processing: in most cases, no interesting behavior
+ * happen, and the trace bits will be discarded soon. This function optimizes
+ * for such cases: one-pass scan on trace bits without modifying anything. Only
+ * on rare cases it fall backs to the slow path: classify_counts() first, then
+ * return has_new_bits(). */
 
-      u16 *mem16 = (u16 *)mem;
+inline u8 has_new_bits_unclassified(afl_state_t *afl, u8 *virgin_map) {
 
-      mem16[0] = count_class_lookup16[mem16[0]];
-      mem16[1] = count_class_lookup16[mem16[1]];
+  /* Handle the hot path first: no new coverage */
+  u8 *end = afl->fsrv.trace_bits + afl->fsrv.map_size;
 
-    }
+#ifdef WORD_SIZE_64
 
-    ++mem;
+  if (!skim((u64 *)virgin_map, (u64 *)afl->fsrv.trace_bits, (u64 *)end))
+    return 0;
 
-  }
+#else
 
-}
+  if (!skim((u32 *)virgin_map, (u32 *)afl->fsrv.trace_bits, (u32 *)end))
+    return 0;
 
 #endif                                                     /* ^WORD_SIZE_64 */
+  classify_counts(&afl->fsrv);
+  return has_new_bits(afl, virgin_map);
+
+}
 
 /* Compact trace bytes into a smaller bitmap. We effectively just drop the
    count information here. This is called only sporadically, for some
@@ -581,7 +444,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
   u8 *queue_fn = "";
   u8  new_bits = '\0';
   s32 fd;
-  u8  keeping = 0, res;
+  u8  keeping = 0, res, classified = 0;
   u64 cksum = 0;
 
   u8 fn[PATH_MAX];
@@ -605,13 +468,17 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
     /* Keep only if there are new bits in the map, add to queue for
        future fuzzing, etc. */
 
-    if (!(new_bits = has_new_bits(afl, afl->virgin_bits))) {
+    new_bits = has_new_bits_unclassified(afl, afl->virgin_bits);
+
+    if (likely(!new_bits)) {
 
       if (unlikely(afl->crash_mode)) { ++afl->total_crashes; }
       return 0;
 
     }
 
+    classified = new_bits;
+
 #ifndef SIMPLE_FILES
 
     queue_fn = alloc_printf(
@@ -715,11 +582,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-#ifdef WORD_SIZE_64
-        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
-#else
-        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
+        if (!classified) {
+
+          classify_counts(&afl->fsrv);
+          classified = 1;
+
+        }
+
+        simplify_trace(afl, afl->fsrv.trace_bits);
 
         if (!has_new_bits(afl, afl->virgin_tmout)) { return keeping; }
 
@@ -764,6 +634,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
         u8 new_fault;
         write_to_testcase(afl, mem, len);
         new_fault = fuzz_run_target(afl, &afl->fsrv, afl->hang_tmout);
+        classify_counts(&afl->fsrv);
 
         /* A corner case that one user reported bumping into: increasing the
            timeout actually uncovers a crash. Make sure we don't discard it if
@@ -812,11 +683,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-#ifdef WORD_SIZE_64
-        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
-#else
-        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
+        if (!classified) {
+
+          classify_counts(&afl->fsrv);
+          classified = 1;
+
+        }
+
+        simplify_trace(afl, afl->fsrv.trace_bits);
 
         if (!has_new_bits(afl, afl->virgin_crash)) { return keeping; }
 
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 0db3a111..ec937f29 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -666,7 +666,7 @@ void read_testcases(afl_state_t *afl, u8 *directory) {
 
   }
 
-  if (afl->shuffle_queue && nl_cnt > 1) {
+  if (unlikely(afl->old_seed_selection && afl->shuffle_queue && nl_cnt > 1)) {
 
     ACTF("Shuffling queue...");
     shuffle_ptrs(afl, (void **)nl, nl_cnt);
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index a97ceb89..60086bd6 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -62,8 +62,6 @@ fuzz_run_target(afl_state_t *afl, afl_forkserver_t *fsrv, u32 timeout) {
   time_spent_start = (spec.tv_sec * 1000000000) + spec.tv_nsec;
 #endif
 
-  // TODO: Don't classify for faults?
-  classify_counts(fsrv);
   return res;
 
 }
@@ -379,6 +377,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
     }
 
+    classify_counts(&afl->fsrv);
     cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
     if (q->exec_cksum != cksum) {
 
@@ -767,13 +766,14 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
       write_with_gap(afl, in_buf, q->len, remove_pos, trim_avail);
 
       fault = fuzz_run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
-      ++afl->trim_execs;
 
       if (afl->stop_soon || fault == FSRV_RUN_ERROR) { goto abort_trimming; }
 
       /* Note that we don't keep track of crashes or hangs here; maybe TODO?
        */
 
+      ++afl->trim_execs;
+      classify_counts(&afl->fsrv);
       cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
       /* If the deletion had no impact on the trace, make it permanent. This
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 9c51a3ef..7053572b 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -401,6 +401,23 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_crash_exitcode =
                 (u8 *)get_afl_env(afl_environment_variables[i]);
 
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+
+          } else if (!strncmp(env, "AFL_NO_COLOR",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_statsd_tags_flavor =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+
+          } else if (!strncmp(env, "AFL_NO_COLOUR",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_statsd_tags_flavor =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+#endif
+
           }
 
         } else {
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 321bbb35..50e2ef15 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -371,6 +371,8 @@ void show_stats(afl_state_t *afl) {
 
   if (!afl->stats_last_execs) {
 
+    if (unlikely(cur_ms == afl->start_time)) --afl->start_time;
+
     afl->stats_avg_exec =
         ((double)afl->fsrv.total_execs) * 1000 / (cur_ms - afl->start_time);
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 391d4c4f..2af374f2 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -156,6 +156,13 @@ static void usage(u8 *argv0, int more_help) {
 
   if (more_help > 1) {
 
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+  #define DYN_COLOR \
+    "AFL_NO_COLOR or AFL_NO_COLOUR: switch colored console output off\n"
+#else
+  #define DYN_COLOR
+#endif
+
     SAYF(
       "Environment variables used:\n"
       "LD_BIND_LAZY: do not set LD_BIND_NOW env var for target\n"
@@ -194,6 +201,9 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n"
       "AFL_NO_SNAPSHOT: do not use the snapshot feature (if the snapshot lkm is loaded)\n"
       "AFL_NO_UI: switch status screen off\n"
+
+      DYN_COLOR
+
       "AFL_PATH: path to AFL support binaries\n"
       "AFL_PYTHON_MODULE: mutate and trim inputs with the specified Python module\n"
       "AFL_QUIET: suppress forkserver status messages\n"
@@ -298,6 +308,17 @@ int main(int argc, char **argv_orig, char **envp) {
   struct timeval  tv;
   struct timezone tz;
 
+  #if defined USE_COLOR && defined ALWAYS_COLORED
+  if (getenv("AFL_NO_COLOR") || getenv("AFL_NO_COLOUR")) {
+
+    WARNF(
+        "Setting AFL_NO_COLOR has no effect (colors are configured on at "
+        "compile time)");
+
+  }
+
+  #endif
+
   char **argv = argv_cpy_dup(argc, argv_orig);
 
   afl_state_t *afl = calloc(1, sizeof(afl_state_t));
diff --git a/src/afl-performance.c b/src/afl-performance.c
index e070a05e..89b170eb 100644
--- a/src/afl-performance.c
+++ b/src/afl-performance.c
@@ -27,45 +27,49 @@
 #include "xxhash.h"
 #undef XXH_INLINE_ALL
 
-/* we use xoshiro256** instead of rand/random because it is 10x faster and has
-   better randomness properties. */
-
-static inline uint64_t rotl(const uint64_t x, int k) {
-
-  return (x << k) | (x >> (64 - k));
-
-}
-
 void rand_set_seed(afl_state_t *afl, s64 init_seed) {
 
   afl->init_seed = init_seed;
   afl->rand_seed[0] =
       hash64((u8 *)&afl->init_seed, sizeof(afl->init_seed), HASH_CONST);
   afl->rand_seed[1] = afl->rand_seed[0] ^ 0x1234567890abcdef;
-  afl->rand_seed[2] = afl->rand_seed[0] & 0x0123456789abcdef;
-  afl->rand_seed[3] = afl->rand_seed[0] | 0x01abcde43f567908;
+  afl->rand_seed[2] = (afl->rand_seed[0] & 0x1234567890abcdef) ^
+                      (afl->rand_seed[1] | 0xfedcba9876543210);
 
 }
 
-inline uint64_t rand_next(afl_state_t *afl) {
+#define ROTL(d, lrot) ((d << (lrot)) | (d >> (8 * sizeof(d) - (lrot))))
 
-  const uint64_t result =
-      rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0];
+#ifdef WORD_SIZE_64
+// romuDuoJr
+inline AFL_RAND_RETURN rand_next(afl_state_t *afl) {
 
-  const uint64_t t = afl->rand_seed[1] << 17;
+  AFL_RAND_RETURN xp = afl->rand_seed[0];
+  afl->rand_seed[0] = 15241094284759029579u * afl->rand_seed[1];
+  afl->rand_seed[1] = afl->rand_seed[1] - xp;
+  afl->rand_seed[1] = ROTL(afl->rand_seed[1], 27);
+  return xp;
 
-  afl->rand_seed[2] ^= afl->rand_seed[0];
-  afl->rand_seed[3] ^= afl->rand_seed[1];
-  afl->rand_seed[1] ^= afl->rand_seed[2];
-  afl->rand_seed[0] ^= afl->rand_seed[3];
+}
 
-  afl->rand_seed[2] ^= t;
+#else
+// RomuTrio32
+inline AFL_RAND_RETURN rand_next(afl_state_t *afl) {
+
+  AFL_RAND_RETURN xp = afl->rand_seed[0], yp = afl->rand_seed[1],
+                  zp = afl->rand_seed[2];
+  afl->rand_seed[0] = 3323815723u * zp;
+  afl->rand_seed[1] = yp - xp;
+  afl->rand_seed[1] = ROTL(afl->rand_seed[1], 6);
+  afl->rand_seed[2] = zp - yp;
+  afl->rand_seed[2] = ROTL(afl->rand_seed[2], 22);
+  return xp;
 
-  afl->rand_seed[3] = rotl(afl->rand_seed[3], 45);
+}
 
-  return result;
+#endif
 
-}
+#undef ROTL
 
 /* returns a double between 0.000000000 and 1.000000000 */
 
@@ -75,80 +79,6 @@ inline double rand_next_percent(afl_state_t *afl) {
 
 }
 
-/* This is the jump function for the generator. It is equivalent
-   to 2^128 calls to rand_next(); it can be used to generate 2^128
-   non-overlapping subsequences for parallel computations. */
-
-void jump(afl_state_t *afl) {
-
-  static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c,
-                                  0xa9582618e03fc9aa, 0x39abdc4529b1661c};
-  size_t                i, b;
-  uint64_t              s0 = 0;
-  uint64_t              s1 = 0;
-  uint64_t              s2 = 0;
-  uint64_t              s3 = 0;
-  for (i = 0; i < (sizeof(JUMP) / sizeof(*JUMP)); i++)
-    for (b = 0; b < 64; b++) {
-
-      if (JUMP[i] & UINT64_C(1) << b) {
-
-        s0 ^= afl->rand_seed[0];
-        s1 ^= afl->rand_seed[1];
-        s2 ^= afl->rand_seed[2];
-        s3 ^= afl->rand_seed[3];
-
-      }
-
-      rand_next(afl);
-
-    }
-
-  afl->rand_seed[0] = s0;
-  afl->rand_seed[1] = s1;
-  afl->rand_seed[2] = s2;
-  afl->rand_seed[3] = s3;
-
-}
-
-/* This is the long-jump function for the generator. It is equivalent to
-   2^192 calls to rand_next(); it can be used to generate 2^64 starting points,
-   from each of which jump() will generate 2^64 non-overlapping
-   subsequences for parallel distributed computations. */
-
-void long_jump(afl_state_t *afl) {
-
-  static const uint64_t LONG_JUMP[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3,
-                                       0x77710069854ee241, 0x39109bb02acbe635};
-
-  size_t   i, b;
-  uint64_t s0 = 0;
-  uint64_t s1 = 0;
-  uint64_t s2 = 0;
-  uint64_t s3 = 0;
-  for (i = 0; i < (sizeof(LONG_JUMP) / sizeof(*LONG_JUMP)); i++)
-    for (b = 0; b < 64; b++) {
-
-      if (LONG_JUMP[i] & UINT64_C(1) << b) {
-
-        s0 ^= afl->rand_seed[0];
-        s1 ^= afl->rand_seed[1];
-        s2 ^= afl->rand_seed[2];
-        s3 ^= afl->rand_seed[3];
-
-      }
-
-      rand_next(afl);
-
-    }
-
-  afl->rand_seed[0] = s0;
-  afl->rand_seed[1] = s1;
-  afl->rand_seed[2] = s2;
-  afl->rand_seed[3] = s3;
-
-}
-
 /* we switch from afl's murmur implementation to xxh3 as it is 30% faster -
    and get 64 bit hashes instead of just 32 bit. Less collisions! :-) */
 
diff --git a/test/test-basic.sh b/test/test-basic.sh
index 24aa30a4..2ddf14af 100755
--- a/test/test-basic.sh
+++ b/test/test-basic.sh
@@ -25,13 +25,16 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
       CODE=1
     }
     rm -f test-instr.plain.0 test-instr.plain.1
+    SKIP=
     TUPLES=`echo 1|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-    test "$TUPLES" -gt 4 -a "$TUPLES" -lt 11 && {
+    test "$TUPLES" -gt 2 -a "$TUPLES" -lt 12 && {
       $ECHO "$GREEN[+] ${AFL_GCC} run reported $TUPLES instrumented locations which is fine"
     } || {
       $ECHO "$RED[!] ${AFL_GCC} instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
+    test "$TUPLES" -lt 4 && SKIP=1
+    true  # this is needed because of the test above
   } || {
     $ECHO "$RED[!] ${AFL_GCC} failed"
     echo CUT------------------------------------------------------------------CUT
@@ -65,18 +68,20 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
   }) || {
     mkdir -p in
     echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
-      CODE=1
+    test -z "$SKIP" && {
+      $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
+        CODE=1
+      }
     }
     echo 000000000000000000000000 > in/in2
     echo 111 > in/in3
@@ -121,6 +126,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
  }
  if [ ${AFL_GCC} = "afl-gcc" ] ; then AFL_GCC=afl-clang ; else AFL_GCC=afl-gcc ; fi
  $ECHO "$BLUE[*] Testing: ${AFL_GCC}, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
+ SKIP=
  test -e ../${AFL_GCC} -a -e ../afl-showmap -a -e ../afl-fuzz && {
   ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../${AFL_GCC} -o test-compcov.harden test-compcov.c > /dev/null 2>&1
@@ -141,12 +147,14 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     }
     rm -f test-instr.plain.0 test-instr.plain.1
     TUPLES=`echo 1|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-    test "$TUPLES" -gt 4 -a "$TUPLES" -lt 11 && {
+    test "$TUPLES" -gt 2 -a "$TUPLES" -lt 12 && {
       $ECHO "$GREEN[+] ${AFL_GCC} run reported $TUPLES instrumented locations which is fine"
     } || {
       $ECHO "$RED[!] ${AFL_GCC} instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
+    test "$TUPLES" -lt 4 && SKIP=1
+    true  # this is needed because of the test above
   } || {
     $ECHO "$RED[!] ${AFL_GCC} failed"
     echo CUT------------------------------------------------------------------CUT
@@ -180,18 +188,20 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
   }) || {
     mkdir -p in
     echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
-      CODE=1
+    test -z "$SKIP" && {
+      $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
+        CODE=1
+      }
     }
     echo 000000000000000000000000 > in/in2
     echo AAA > in/in3
@@ -220,9 +230,9 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
       case "$CNT" in
         *2) $ECHO "$GREEN[+] afl-cmin.bash correctly minimized the number of testcases" ;;
         1)  {
-            test -s in2/* && $ECHO "$YELLOW[?] afl-cmin did minimize to one testcase. This can be a bug or due compiler optimization."
+            test -s in2/* && $ECHO "$YELLOW[?] afl-cmin.bash did minimize to one testcase. This can be a bug or due compiler optimization."
               test -s in2/* || {
-  		$ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases ($CNT)"
+  		$ECHO "$RED[!] afl-cmin.bash did not correctly minimize the number of testcases ($CNT)"
           	CODE=1
               }
             }
diff --git a/test/test-gcc-plugin.sh b/test/test-gcc-plugin.sh
index 71d86364..2b09e753 100755
--- a/test/test-gcc-plugin.sh
+++ b/test/test-gcc-plugin.sh
@@ -19,13 +19,15 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
       } || {
         $ECHO "$GREEN[+] gcc_plugin instrumentation present and working correctly"
         TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain.gccpi 2>&1 | grep Captur | awk '{print$3}'`
-        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 7 && {
+        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 9 && {
           $ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine"
         } || {
           $ECHO "$RED[!] gcc_plugin instrumentation produces a weird numbers: $TUPLES"
           $ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-("
           #CODE=1
         }
+        test "$TUPLES" -lt 4 && SKIP=1
+        true
       }
     } || {
       $ECHO "$RED[!] gcc_plugin instrumentation failed"
@@ -60,22 +62,24 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
     CODE=1
     true
   }) || {
-    mkdir -p in
-    echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for gcc_plugin, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain.gccpi >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with gcc_plugin"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with gcc_plugin"
-      CODE=1
+    test -z "$SKIP" && {
+      mkdir -p in
+      echo 0 > in/in
+      $ECHO "$GREY[*] running afl-fuzz for gcc_plugin, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain.gccpi >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with gcc_plugin"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with gcc_plugin"
+        CODE=1
+      }
+      rm -rf in out errors
     }
-    rm -rf in out errors
   }
   rm -f test-instr.plain.gccpi
 
diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index 4fcaf367..09ade0c3 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -31,6 +31,8 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
           $ECHO "$RED[!] llvm_mode instrumentation produces weird numbers: $TUPLES"
           CODE=1
         }
+        test "$TUPLES" -lt 4 && SKIP=1
+        true
       }
     } || {
       $ECHO "$RED[!] llvm_mode instrumentation failed"
@@ -66,18 +68,20 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   }) || {
     mkdir -p in
     echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for llvm_mode, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode"
-      CODE=1
+    test -z "$SKIP" && {
+      $ECHO "$GREY[*] running afl-fuzz for llvm_mode, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode"
+        CODE=1
+      }
     }
     test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc" || {
       echo 000000000000000000000000 > in/in2
@@ -133,6 +137,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
       }
       rm -f test-instr.instrim test.out
     } || {
+      cat test.out
       $ECHO "$RED[!] llvm_mode InsTrim compilation failed"
       CODE=1
     }
diff --git a/test/test-pre.sh b/test/test-pre.sh
index 4c708a68..85ac320b 100755
--- a/test/test-pre.sh
+++ b/test/test-pre.sh
@@ -90,6 +90,7 @@ unset AFL_CUSTOM_MUTATOR_LIBRARY
 unset AFL_PYTHON_MODULE
 unset AFL_PRELOAD
 unset LD_PRELOAD
+unset SKIP
 
 rm -rf in in2 out
 
diff --git a/utils/afl_proxy/afl-proxy.c b/utils/afl_proxy/afl-proxy.c
index f2dfeac1..352e78e4 100644
--- a/utils/afl_proxy/afl-proxy.c
+++ b/utils/afl_proxy/afl-proxy.c
@@ -213,7 +213,7 @@ int main(int argc, char *argv[]) {
   u32 len;
 
   /* here you specify the map size you need that you are reporting to
-     afl-fuzz. */
+     afl-fuzz.  Any value is fine as long as it can be divided by 8. */
   __afl_map_size = MAP_SIZE;  // default is 65536
 
   /* then we initialize the shared memory map and start the forkserver */
diff --git a/utils/afl_untracer/afl-untracer.c b/utils/afl_untracer/afl-untracer.c
index cb6f948c..695f8dd1 100644
--- a/utils/afl_untracer/afl-untracer.c
+++ b/utils/afl_untracer/afl-untracer.c
@@ -568,7 +568,7 @@ void setup_trap_instrumentation(void) {
     lib_addr[offset] = 0xcc;  // replace instruction with debug trap
     if (debug)
       fprintf(stderr,
-              "Patch entry: %p[%x] = %p = %02x -> SHADOW(%p) #%d -> %08x\n",
+              "Patch entry: %p[%lx] = %p = %02x -> SHADOW(%p) #%d -> %08x\n",
               lib_addr, offset, lib_addr + offset, orig_byte, shadow,
               bitmap_index, *shadow);
 
@@ -582,7 +582,7 @@ void setup_trap_instrumentation(void) {
     *patch_bytes = 0xd4200000;  // replace instruction with debug trap
     if (debug)
       fprintf(stderr,
-              "Patch entry: %p[%x] = %p = %02x -> SHADOW(%p) #%d -> %016x\n",
+              "Patch entry: %p[%lx] = %p = %02x -> SHADOW(%p) #%d -> %016x\n",
               lib_addr, offset, lib_addr + offset, orig_bytes, shadow,
               bitmap_index, *shadow);