about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/ci.yml27
-rw-r--r--.github/workflows/codeql-analysis.yml32
-rw-r--r--.gitignore1
-rw-r--r--.travis.yml59
-rw-r--r--GNUmakefile61
-rw-r--r--GNUmakefile.gcc_plugin2
-rw-r--r--GNUmakefile.llvm4
-rw-r--r--README.md2
-rwxr-xr-xafl-cmin44
-rwxr-xr-xafl-system-config1
-rw-r--r--docs/Changelog.md18
-rw-r--r--docs/binaryonly_fuzzing.md5
-rw-r--r--docs/env_variables.md7
-rw-r--r--include/afl-fuzz.h20
-rw-r--r--include/alloc-inl.h18
-rw-r--r--include/config.h19
-rw-r--r--include/coverage-32.h112
-rw-r--r--include/coverage-64.h189
-rw-r--r--include/debug.h78
-rw-r--r--include/envs.h6
-rw-r--r--include/forkserver.h2
-rw-r--r--instrumentation/Makefile2
-rw-r--r--instrumentation/README.llvm.md21
-rw-r--r--instrumentation/afl-compiler-rt.o.c4
-rw-r--r--instrumentation/afl-gcc-pass.so.cc10
-rw-r--r--instrumentation/afl-llvm-common.cc10
-rwxr-xr-xqemu_mode/build_qemu_support.sh143
-rw-r--r--src/afl-analyze.c20
-rw-r--r--src/afl-cc.c145
-rw-r--r--src/afl-common.c5
-rw-r--r--src/afl-forkserver.c4
-rw-r--r--src/afl-fuzz-bitmap.c330
-rw-r--r--src/afl-fuzz-init.c2
-rw-r--r--src/afl-fuzz-run.c9
-rw-r--r--src/afl-fuzz-state.c17
-rw-r--r--src/afl-fuzz-stats.c2
-rw-r--r--src/afl-fuzz.c21
-rw-r--r--src/afl-performance.c124
-rw-r--r--src/afl-showmap.c28
-rw-r--r--src/afl-tmin.c20
-rwxr-xr-xtest/test-basic.sh66
-rwxr-xr-xtest/test-gcc-plugin.sh36
-rwxr-xr-xtest/test-llvm.sh29
-rwxr-xr-xtest/test-pre.sh1
-rw-r--r--utils/afl_proxy/afl-proxy.c2
-rw-r--r--utils/afl_untracer/afl-untracer.c4
46 files changed, 1154 insertions, 608 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1f7d23f4..31cfceaf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: C/C++ CI
+name: CI
 
 on:
   push:
@@ -8,12 +8,21 @@ on:
 
 jobs:
   build:
-
-    runs-on: ubuntu-latest
-
+    runs-on: '${{ matrix.os }}'
+    strategy:
+      matrix:
+        os: [ubuntu-20.04, ubuntu-18.04]
     steps:
-    - uses: actions/checkout@v2
-    - name: make
-      run: make
-    - name: make tests
-      run: make tests
+      - uses: actions/checkout@v2
+      - name: debug
+        run: apt-cache search plugin-dev | grep gcc- ; echo ; apt-cache search clang-format- | grep clang-format-
+      - name: install packages
+        run: sudo apt-get install -y -m -f --install-suggests build-essential git libtool libtool-bin automake bison libglib2.0-0 clang llvm-dev libc++-dev findutils libcmocka-dev python3-dev python3-setuptools
+      - name: compiler installed
+        run: gcc -v ; echo ; clang -v
+      - name: install gcc plugin
+        run: sudo apt-get install -y -m -f --install-suggests $(readlink /usr/bin/gcc)-plugin-dev
+      - name: build afl++
+        run: make distrib ASAN_BUILD=1
+      - name: run tests
+        run: sudo -E ./afl-system-config ; export AFL_SKIP_CPUFREQ=1 ; make tests
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 00000000..eda8dfd0
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,32 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ stable, dev ]
+  pull_request:
+    branches: [ stable, dev ]
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'cpp' ]
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1
diff --git a/.gitignore b/.gitignore
index 82a81605..fa820833 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,3 +82,4 @@ examples/aflpp_driver/libAFLQemuDriver.a
 libAFLDriver.a
 libAFLQemuDriver.a
 test/.afl_performance
+gmon.out
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index b8b36e6b..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-language: c
-
-sudo: required
-
-branches:
-  only:
-    - stable
-    - dev
-
-matrix:
-  include:
-  #- os: linux # again disabled because fetching packages times out very often :(
-  #  dist: focal
-  #  env: NAME="focal-amd64" MODERN="yes" GCC="9"
-  - os: linux
-    dist: bionic
-    env: NAME="bionic-amd64" MODERN="yes" GCC="7"
-  - os: linux
-    dist: xenial
-    env: NAME="xenial-amd64" MODERN="no" GCC="5" EXTRA="libtool-bin clang-6.0"
-#  - os: linux # disabled: fatal: unable to access 'https://git.qemu.org/git/capstone/': gnutls_handshake() failed: Handshake failed
-#    dist: trusty
-#    env: NAME="trusty-amd64" MODERN="no" GCC="4.8"
-  - os: linux # until travis can fix this!
-    dist: xenial
-    arch: arm64
-    env: NAME="xenial-arm64" MODERN="no" GCC="5" EXTRA="libtool-bin clang-6.0" AFL_NO_X86="1" CPU_TARGET="aarch64"
-#  - os: osx
-#    osx_image: xcode11.2
-#    env: NAME="osx" HOMEBREW_NO_ANALYTICS="1" LINK="http://releases.llvm.org/9.0.0/" NAME="clang+llvm-9.0.0-x86_64-darwin-apple"
-
-jobs:
-  allow_failures:
-    - os: osx
-    - arch: arm64
-
-env:
-  - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1
- # - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_EXIT_WHEN_DONE=1
- # TODO: test AFL_BENCH_UNTIL_CRASH once we have a target that crashes
- # - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_BENCH_JUST_ONE=1
-
-before_install:
-  # export LLVM_DIR=${TRAVIS_BUILD_DIR}/${LLVM_PACKAGE}
-  - echo Testing on $NAME
-  - if [ "$TRAVIS_OS_NAME" = "osx" ]; then wget "$LINK""$NAME".tar.xz ; export LLVM_CONFIG=`pwd`/"$NAME" ; tar xJf "$NAME".tar.xz ; fi
-  - if [ "$MODERN" = "yes" ]; then sudo apt update ; sudo apt upgrade ; sudo apt install -y git libtool libtool-bin automake bison libglib2.0-0 build-essential clang gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-"$GCC"-dev findutils libcmocka-dev python3-setuptools ; fi
-  - if [ "$MODERN" = "no" ]; then sudo apt update ; sudo apt install -y git libtool $EXTRA libpixman-1-dev automake bison libglib2.0 build-essential gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-dev findutils libcmocka-dev python3-setuptools ; fi
-
-script:
-  - gcc -v
-  - clang -v
-  - sudo -E ./afl-system-config
-  - sudo sysctl -w kernel.shmmax=10000000000
-  - if [ "$TRAVIS_OS_NAME" = "osx" ]; then export LLVM_CONFIG=`pwd`/"$NAME" ; make source-only ASAN_BUILD=1 ; fi
-  - if [ "$TRAVIS_OS_NAME" = "linux" -a "$TRAVIS_CPU_ARCH" = "amd64" ]; then make distrib ASAN_BUILD=1 ; fi
-  - if [ "$TRAVIS_CPU_ARCH" = "arm64" ] ; then export LLVM_CONFIG=llvm-config-6.0 ; make ASAN_BUILD=1 ; cd qemu_mode && sh ./build_qemu_support.sh ; cd .. ; fi
-  - make tests
-#  - travis_terminate 0
diff --git a/GNUmakefile b/GNUmakefile
index 5c82279b..a1af1fd5 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -42,8 +42,8 @@ endif
 
 ifdef ASAN_BUILD
   $(info Compiling ASAN version of binaries)
-  override CFLAGS+=$(ASAN_CFLAGS)
-  LDFLAGS+=$(ASAN_LDFLAGS)
+  override CFLAGS += $(ASAN_CFLAGS)
+  LDFLAGS += $(ASAN_LDFLAGS)
 endif
 ifdef UBSAN_BUILD
   $(info Compiling UBSAN version of binaries)
@@ -77,30 +77,34 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -fno-move-loop-invariants -
 	SPECIAL_PERFORMANCE += -fno-move-loop-invariants -fdisable-tree-cunrolli
 endif
 
+ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+  ifndef SOURCE_DATE_EPOCH
+    HAVE_MARCHNATIVE = 1
+    CFLAGS_OPT += -march=native
+  endif
+endif
+
 ifneq "$(shell uname)" "Darwin"
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-   ifndef SOURCE_DATE_EPOCH
- 	#CFLAGS_OPT += -march=native
- 	SPECIAL_PERFORMANCE += -march=native
-   endif
- endif
+  ifeq "$(HAVE_MARCHNATIVE)" "1"
+    SPECIAL_PERFORMANCE += -march=native
+  endif
  # OS X does not like _FORTIFY_SOURCE=2
- ifndef DEBUG
-   CFLAGS_OPT += -D_FORTIFY_SOURCE=2
- endif
+  ifndef DEBUG
+    CFLAGS_OPT += -D_FORTIFY_SOURCE=2
+  endif
 endif
 
 ifeq "$(shell uname)" "SunOS"
- CFLAGS_OPT += -Wno-format-truncation
- LDFLAGS=-lkstat -lrt
+  CFLAGS_OPT += -Wno-format-truncation
+  LDFLAGS = -lkstat -lrt
 endif
 
 ifdef STATIC
   $(info Compiling static version of binaries, disabling python though)
   # Disable python for static compilation to simplify things
-  PYTHON_OK=0
+  PYTHON_OK = 0
   PYFLAGS=
-  PYTHON_INCLUDE=/
+  PYTHON_INCLUDE = /
 
   CFLAGS_OPT += -static
   LDFLAGS += -lm -lpthread -lz -lutil
@@ -117,6 +121,7 @@ ifdef INTROSPECTION
   CFLAGS_OPT += -DINTROSPECTION=1
 endif
 
+
 ifneq "$(shell uname -m)" "x86_64"
  ifneq "$(patsubst i%86,i386,$(shell uname -m))" "i386"
   ifneq "$(shell uname -m)" "amd64"
@@ -131,7 +136,7 @@ ifdef DEBUG
   $(info Compiling DEBUG version of binaries)
   CFLAGS += -ggdb3 -O0 -Wall -Wextra -Werror
 else
-  CFLAGS     ?= -O3 -funroll-loops $(CFLAGS_OPT)
+  CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT)
 endif
 
 override CFLAGS += -g -Wno-pointer-sign -Wno-variadic-macros -Wall -Wextra -Wpointer-arith \
@@ -512,23 +517,23 @@ code-format:
 ifndef AFL_NO_X86
 test_build: afl-cc afl-gcc afl-as afl-showmap
 	@echo "[*] Testing the CC wrapper afl-cc and its instrumentation output..."
-	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
+	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
 	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation of afl-cc does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi
 	@echo
 	@echo "[+] All right, the instrumentation of afl-cc seems to be working!"
-	@echo "[*] Testing the CC wrapper afl-gcc and its instrumentation output..."
-	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; AFL_INST_RATIO=100 AFL_PATH=. ./afl-gcc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-gcc failed"; exit 1 )
-	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
-	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
-	@rm -f test-instr
-	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation of afl-gcc does not seem to be behaving correctly!"; \
-		gcc -v 2>&1 | grep -q -- --with-as= && ( echo; echo "Gcc is configured not to use an external assembler with the -B option."; echo "See docs/INSTALL.md section 5 how to build a -B enabled gcc." ) || \
-		( echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue." ); echo; exit 0; fi
-	@echo
-	@echo "[+] All right, the instrumentation of afl-gcc seems to be working!"
+#	@echo "[*] Testing the CC wrapper afl-gcc and its instrumentation output..."
+#	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN; AFL_CC=$(CC) ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-gcc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-gcc failed"; exit 1 )
+#	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
+#	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
+#	@rm -f test-instr
+#	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation of afl-gcc does not seem to be behaving correctly!"; \
+#		gcc -v 2>&1 | grep -q -- --with-as= && ( echo; echo "Gcc is configured not to use an external assembler with the -B option."; echo "See docs/INSTALL.md section 5 how to build a -B enabled gcc." ) || \
+#		( echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue." ); echo; exit 0; fi
+#	@echo
+#	@echo "[+] All right, the instrumentation of afl-gcc seems to be working!"
 else
 test_build: afl-cc afl-as afl-showmap
 	@echo "[!] Note: skipping build tests (you may need to use LLVM or QEMU mode)."
@@ -586,7 +591,7 @@ distrib: all
 	-cd unicorn_mode && unset CFLAGS && sh ./build_unicorn_support.sh
 
 .PHONY: binary-only
-binary-only: all
+binary-only: test_shm test_python ready $(PROGS)
 	$(MAKE) -C utils/libdislocator
 	$(MAKE) -C utils/libtokencap
 	$(MAKE) -C utils/afl_network_proxy
diff --git a/GNUmakefile.gcc_plugin b/GNUmakefile.gcc_plugin
index ee211c24..aa93c688 100644
--- a/GNUmakefile.gcc_plugin
+++ b/GNUmakefile.gcc_plugin
@@ -138,7 +138,7 @@ afl-common.o: ./src/afl-common.c
 .PHONY: test_build
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN; AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. AFL_CC=$(CC) ./afl-gcc-fast $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
+	unset AFL_USE_ASAN AFL_USE_MSAN; ASAN_OPTIONS=detect_leaks=0 AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. AFL_CC=$(CC) ./afl-gcc-fast $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr </dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
diff --git a/GNUmakefile.llvm b/GNUmakefile.llvm
index 414cd487..3554c8bf 100644
--- a/GNUmakefile.llvm
+++ b/GNUmakefile.llvm
@@ -357,7 +357,7 @@ instrumentation/afl-common.o: ./src/afl-common.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ $(LDFLAGS)
 
 ./afl-cc: src/afl-cc.c instrumentation/afl-common.o
-	$(CC) $(CLANG_CFL) $(CFLAGS) $(CPPFLAGS) $< instrumentation/afl-common.o -o $@ -DLLVM_MINOR=$(LLVM_MINOR) -DLLVM_MAJOR=$(LLVM_MAJOR) $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\"
+	$(CC) $(CLANG_CFL) $(CFLAGS) $(CPPFLAGS) $< instrumentation/afl-common.o -o $@ -DLLVM_MINOR=$(LLVM_MINOR) -DLLVM_MAJOR=$(LLVM_MAJOR) $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\" -lm
 	@ln -sf afl-cc ./afl-c++
 	@ln -sf afl-cc ./afl-gcc
 	@ln -sf afl-cc ./afl-g++
@@ -452,7 +452,7 @@ document:
 .PHONY: test_build
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_LLVM_LAF_ALL=1 ./afl-cc $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
+	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; ASAN_OPTIONS=detect_leaks=0 AFL_QUIET=1 AFL_PATH=. AFL_LLVM_LAF_ALL=1 ./afl-cc $(CFLAGS) $(CPPFLAGS) ./test-instr.c -o test-instr $(LDFLAGS)
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
diff --git a/README.md b/README.md
index 68b64ce6..a0e7a7e4 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
   Release Version: [3.00c](https://github.com/AFLplusplus/AFLplusplus/releases)
 
-  Github Version: 3.00a
+  Github Version: 3.01a
 
   Repository: [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
 
diff --git a/afl-cmin b/afl-cmin
index 292d9d9d..eef2b7ef 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -366,33 +366,35 @@ BEGIN {
     cp_tool = "cp"
   }
 
-  # Make sure that we can actually get anything out of afl-showmap before we
-  # waste too much time.
+  if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) {
+    # Make sure that we can actually get anything out of afl-showmap before we
+    # waste too much time.
 
-  print "[*] Testing the target binary..."
+    print "[*] Testing the target binary..."
 
-  if (!stdin_file) {
-    system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
-  } else {
-    system("cp "in_dir"/"first_file" "stdin_file)
-    system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
-  }
+    if (!stdin_file) {
+      system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
+    } else {
+      system("cp "in_dir"/"first_file" "stdin_file)
+      system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
+    }
 
-  first_count = 0
+    first_count = 0
 
-  runtest = trace_dir"/.run_test"
-  while ((getline < runtest) > 0) {
-    ++first_count
-  }
+    runtest = trace_dir"/.run_test"
+    while ((getline < runtest) > 0) {
+      ++first_count
+    }
 
-  if (first_count) {
-    print "[+] OK, "first_count" tuples recorded."
-  } else {
-    print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
-    if (!ENVIRON["AFL_KEEP_TRACES"]) {
-      system("rm -rf "trace_dir" 2>/dev/null")
+    if (first_count) {
+      print "[+] OK, "first_count" tuples recorded."
+    } else {
+      print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
+      if (!ENVIRON["AFL_KEEP_TRACES"]) {
+        system("rm -rf "trace_dir" 2>/dev/null")
+      }
+      exit 1
     }
-    exit 1
   }
 
   # Let's roll!
diff --git a/afl-system-config b/afl-system-config
index 4ca9f0a9..7031544c 100755
--- a/afl-system-config
+++ b/afl-system-config
@@ -80,3 +80,4 @@ if [ "$PLATFORM" = "Darwin" ] ; then
   DONE=1
 fi
 test -z "$DONE" && echo Error: Unknown platform: $PLATFORM
+exit 0
diff --git a/docs/Changelog.md b/docs/Changelog.md
index d77c276b..cf9bfbe1 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -9,6 +9,22 @@ Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 
+### Version ++3.01a (release)
+  - afl-fuzz
+    - fix crash for very, very fast targets+systems (thanks to mhlakhani
+      for reporting)
+    - switched to a faster RNG
+    - added hghwng's patch for faster trace map analysis
+  - afl-cc
+    - allow instrumenting LLVMFuzzerTestOneInput
+    - fixed endless loop for allow/blocklist lines starting with a
+      comment (thanks to Zherya for reporting)
+    - added AFL_LLVM_INSTRUMENT option NATIVE for native clang pc-guard
+      support (less performant than our own), GCC for old afl-gcc and
+      CLANG for old afl-clang
+  - added dummy Makefile to instrumentation/
+
+
 ### Version ++3.00c (release)
   - llvm_mode/ and gcc_plugin/ moved to instrumentation/
   - examples/ renamed to utils/
@@ -46,6 +62,8 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
     - somewhere we broke -n dumb fuzzing, fixed
     - added afl_custom_describe to the custom mutator API to allow for easy
       mutation reproduction on crashing inputs
+    - new env. var. AFL_NO_COLOR (or AFL_NO_COLOUR) to suppress colored
+      console output (when configured with USE_COLOR and not ALWAYS_COLORED)
   - instrumentation
     - We received an enhanced gcc_plugin module from AdaCore, thank you
       very much!!
diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md
index 66734452..787d970d 100644
--- a/docs/binaryonly_fuzzing.md
+++ b/docs/binaryonly_fuzzing.md
@@ -174,7 +174,7 @@
 
   Pintool and Dynamorio are dynamic instrumentation engines, and they can be
   used for getting basic block information at runtime.
-  Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
+  Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows,
   whereas Dynamorio is additionally available for ARM and AARCH64.
   Dynamorio is also 10x faster than Pintool.
 
@@ -182,7 +182,7 @@
   Dynamorio has a speed decrease of 98-99%
   Pintool has a speed decrease of 99.5%
 
-  Hence Dynamorio is the option to go for if everything fails, and Pintool
+  Hence Dynamorio is the option to go for if everything else fails, and Pintool
   only if Dynamorio fails too.
 
   Dynamorio solutions:
@@ -205,6 +205,7 @@
   * QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
   * Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
   * S2E: [https://github.com/S2E](https://github.com/S2E)
+  * Tinyinst [https://github.com/googleprojectzero/TinyInst](https://github.com/googleprojectzero/TinyInst) (Mac/Windows only)
   *  ... please send me any missing that are good
 
 
diff --git a/docs/env_variables.md b/docs/env_variables.md
index e203055f..e6b9381b 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -113,11 +113,15 @@ Then there are a few specific features that are only available in instrumentatio
 
     - `AFL_LLVM_INSTRUMENT` - this configures the instrumentation mode. 
       Available options:
+        PCGUARD - our own pcgard based instrumentation (default)
+        NATIVE - clang's original pcguard based instrumentation
         CLASSIC - classic AFL (map[cur_loc ^ prev_loc >> 1]++) (default)
         CFG - InsTrim instrumentation (see below)
         LTO - LTO instrumentation (see below)
         CTX - context sensitive instrumentation (see below)
         NGRAM-x - deeper previous location coverage (from NGRAM-2 up to NGRAM-16)
+        GCC - outdated gcc instrumentation
+        CLANG - outdated clang instrumentation
       In CLASSIC (default) and CFG/INSTRIM you can also specify CTX and/or
       NGRAM, seperate the options with a comma "," then, e.g.:
         `AFL_LLVM_INSTRUMENT=CFG,CTX,NGRAM-4`
@@ -381,6 +385,9 @@ checks or alter some of the more exotic semantics of the tool:
     some basic stats. This behavior is also automatically triggered when the
     output from afl-fuzz is redirected to a file or to a pipe.
 
+  - Setting `AFL_NO_COLOR` or `AFL_NO_COLOUR` will omit control sequences for
+    coloring console output when configured with USE_COLOR and not ALWAYS_COLORED.
+
   - Setting `AFL_FORCE_UI` will force painting the UI on the screen even if
     no valid terminal was detected (for virtual consoles)
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 2f2d31d3..e2fb0344 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -134,6 +134,12 @@
 // Little helper to access the ptr to afl->##name_buf - for use in afl_realloc.
 #define AFL_BUF_PARAM(name) ((void **)&afl->name##_buf)
 
+#ifdef WORD_SIZE_64
+  #define AFL_RAND_RETURN u64
+#else
+  #define AFL_RAND_RETURN u32
+#endif
+
 extern s8  interesting_8[INTERESTING_8_LEN];
 extern s16 interesting_16[INTERESTING_8_LEN + INTERESTING_16_LEN];
 extern s32
@@ -580,7 +586,7 @@ typedef struct afl_state {
 
   u32 rand_cnt;                         /* Random number counter            */
 
-  u64 rand_seed[4];
+  u64 rand_seed[3];
   s64 init_seed;
 
   u64 total_cal_us,                     /* Total calibration time (us)      */
@@ -1014,12 +1020,12 @@ void write_bitmap(afl_state_t *);
 u32  count_bits(afl_state_t *, u8 *);
 u32  count_bytes(afl_state_t *, u8 *);
 u32  count_non_255_bytes(afl_state_t *, u8 *);
-#ifdef WORD_SIZE_64
-void simplify_trace(afl_state_t *, u64 *);
+void simplify_trace(afl_state_t *, u8 *);
 void classify_counts(afl_forkserver_t *);
+#ifdef WORD_SIZE_64
+void discover_word(u8 *ret, u64 *current, u64 *virgin);
 #else
-void simplify_trace(afl_state_t *, u32 *);
-void classify_counts(afl_forkserver_t *);
+void discover_word(u8 *ret, u32 *current, u32 *virgin);
 #endif
 void init_count_class16(void);
 void minimize_bits(afl_state_t *, u8 *, u8 *);
@@ -1028,6 +1034,7 @@ u8 *describe_op(afl_state_t *, u8, size_t);
 #endif
 u8 save_if_interesting(afl_state_t *, void *, u32, u8);
 u8 has_new_bits(afl_state_t *, u8 *);
+u8 has_new_bits_unclassified(afl_state_t *, u8 *);
 
 /* Extras */
 
@@ -1111,8 +1118,7 @@ u8 common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len);
 u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
                         u64 exec_cksum);
 
-/* xoshiro256** */
-uint64_t rand_next(afl_state_t *afl);
+AFL_RAND_RETURN rand_next(afl_state_t *afl);
 
 /* probability between 0.0 and 1.0 */
 double rand_next_percent(afl_state_t *afl);
diff --git a/include/alloc-inl.h b/include/alloc-inl.h
index 3044b7a0..8a91d196 100644
--- a/include/alloc-inl.h
+++ b/include/alloc-inl.h
@@ -271,7 +271,7 @@ static inline void *DFL_ck_alloc_nozero(u32 size) {
   ret = malloc(size + ALLOC_OFF_TOTAL);
   ALLOC_CHECK_RESULT(ret, size);
 
-  ret += ALLOC_OFF_HEAD;
+  ret = (char *)ret + ALLOC_OFF_HEAD;
 
   ALLOC_C1(ret) = ALLOC_MAGIC_C1;
   ALLOC_S(ret) = size;
@@ -311,7 +311,7 @@ static inline void DFL_ck_free(void *mem) {
 
   ALLOC_C1(mem) = ALLOC_MAGIC_F;
 
-  free(mem - ALLOC_OFF_HEAD);
+  free((char *)mem - ALLOC_OFF_HEAD);
 
 }
 
@@ -340,7 +340,7 @@ static inline void *DFL_ck_realloc(void *orig, u32 size) {
   #endif                                                    /* !DEBUG_BUILD */
 
     old_size = ALLOC_S(orig);
-    orig -= ALLOC_OFF_HEAD;
+    orig = (char *)orig - ALLOC_OFF_HEAD;
 
     ALLOC_CHECK_SIZE(old_size);
 
@@ -363,10 +363,10 @@ static inline void *DFL_ck_realloc(void *orig, u32 size) {
 
   if (orig) {
 
-    memcpy(ret + ALLOC_OFF_HEAD, orig + ALLOC_OFF_HEAD, MIN(size, old_size));
-    memset(orig + ALLOC_OFF_HEAD, 0xFF, old_size);
+    memcpy((char *)ret + ALLOC_OFF_HEAD, (char *)orig + ALLOC_OFF_HEAD, MIN(size, old_size));
+    memset((char *)orig + ALLOC_OFF_HEAD, 0xFF, old_size);
 
-    ALLOC_C1(orig + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F;
+    ALLOC_C1((char *)orig + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F;
 
     free(orig);
 
@@ -374,13 +374,13 @@ static inline void *DFL_ck_realloc(void *orig, u32 size) {
 
   #endif                                                   /* ^!DEBUG_BUILD */
 
-  ret += ALLOC_OFF_HEAD;
+  ret = (char *)ret + ALLOC_OFF_HEAD;
 
   ALLOC_C1(ret) = ALLOC_MAGIC_C1;
   ALLOC_S(ret) = size;
   ALLOC_C2(ret) = ALLOC_MAGIC_C2;
 
-  if (size > old_size) memset(ret + old_size, 0, size - old_size);
+  if (size > old_size) memset((char *)ret + old_size, 0, size - old_size);
 
   return ret;
 
@@ -401,7 +401,7 @@ static inline u8 *DFL_ck_strdup(u8 *str) {
   ret = malloc(size + ALLOC_OFF_TOTAL);
   ALLOC_CHECK_RESULT(ret, size);
 
-  ret += ALLOC_OFF_HEAD;
+  ret = (char *)ret + ALLOC_OFF_HEAD;
 
   ALLOC_C1(ret) = ALLOC_MAGIC_C1;
   ALLOC_S(ret) = size;
diff --git a/include/config.h b/include/config.h
index 93249ed9..e8a49270 100644
--- a/include/config.h
+++ b/include/config.h
@@ -28,7 +28,7 @@
 /* Version string: */
 
 // c = release, d = volatile github dev, e = experimental branch
-#define VERSION "++3.00c"
+#define VERSION "++3.01a"
 
 /******************************************************
  *                                                    *
@@ -36,11 +36,28 @@
  *                                                    *
  ******************************************************/
 
+/* console output colors: There are three ways to configure its behavior
+ * 1. default: colored outputs fixed on: defined USE_COLOR && defined
+ * ALWAYS_COLORED The env var. AFL_NO_COLOR will have no effect
+ * 2. defined USE_COLOR && !defined ALWAYS_COLORED
+ *    -> depending on env var AFL_NO_COLOR=1 colors can be switched off
+ *    at run-time. Default is to use colors.
+ * 3. colored outputs fixed off: !defined USE_COLOR
+ *    The env var. AFL_NO_COLOR will have no effect
+ */
+
 /* Comment out to disable terminal colors (note that this makes afl-analyze
    a lot less nice): */
 
 #define USE_COLOR
 
+#ifdef USE_COLOR
+  /* Comment in to always enable terminal colors */
+  /* Comment out to enable runtime controlled terminal colors via AFL_NO_COLOR
+   */
+  #define ALWAYS_COLORED 1
+#endif
+
 /* StatsD config
    Config can be adjusted via AFL_STATSD_HOST and AFL_STATSD_PORT environment
    variable.
diff --git a/include/coverage-32.h b/include/coverage-32.h
new file mode 100644
index 00000000..a5cc498c
--- /dev/null
+++ b/include/coverage-32.h
@@ -0,0 +1,112 @@
+#include "config.h"
+#include "types.h"
+
+u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end);
+u32 classify_word(u32 word);
+
+inline u32 classify_word(u32 word) {
+
+  u16 mem16[2];
+  memcpy(mem16, &word, sizeof(mem16));
+
+  mem16[0] = count_class_lookup16[mem16[0]];
+  mem16[1] = count_class_lookup16[mem16[1]];
+
+  memcpy(&word, mem16, sizeof(mem16));
+  return word;
+
+}
+
+void simplify_trace(afl_state_t *afl, u8 *bytes) {
+
+  u32 *mem = (u32 *)bytes;
+  u32  i = (afl->fsrv.map_size >> 2);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) {
+
+      u8 *mem8 = (u8 *)mem;
+
+      mem8[0] = simplify_lookup[mem8[0]];
+      mem8[1] = simplify_lookup[mem8[1]];
+      mem8[2] = simplify_lookup[mem8[2]];
+      mem8[3] = simplify_lookup[mem8[3]];
+
+    } else
+
+      *mem = 0x01010101;
+
+    mem++;
+
+  }
+
+}
+
+inline void classify_counts(afl_forkserver_t *fsrv) {
+
+  u32 *mem = (u32 *)fsrv->trace_bits;
+  u32  i = (fsrv->map_size >> 2);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) { *mem = classify_word(*mem); }
+
+    mem++;
+
+  }
+
+}
+
+/* Updates the virgin bits, then reflects whether a new count or a new tuple is
+ * seen in ret. */
+inline void discover_word(u8 *ret, u32 *current, u32 *virgin) {
+
+  /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
+     that have not been already cleared from the virgin map - since this will
+     almost always be the case. */
+
+  if (*current & *virgin) {
+
+    if (likely(*ret < 2)) {
+
+      u8 *cur = (u8 *)current;
+      u8 *vir = (u8 *)virgin;
+
+      /* Looks like we have not found any new bytes yet; see if any non-zero
+         bytes in current[] are pristine in virgin[]. */
+
+      if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
+          (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff))
+        *ret = 2;
+      else
+        *ret = 1;
+
+    }
+
+    *virgin &= ~*current;
+
+  }
+
+}
+
+#define PACK_SIZE 16
+inline u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end) {
+
+  for (; current != current_end; virgin += 4, current += 4) {
+
+    if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
+    if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
+    if (current[2] && classify_word(current[2]) & virgin[2]) return 1;
+    if (current[3] && classify_word(current[3]) & virgin[3]) return 1;
+
+  }
+
+  return 0;
+
+}
+
diff --git a/include/coverage-64.h b/include/coverage-64.h
new file mode 100644
index 00000000..0ede5fa5
--- /dev/null
+++ b/include/coverage-64.h
@@ -0,0 +1,189 @@
+#include "config.h"
+#include "types.h"
+
+#if (defined(__AVX512F__) && defined(__AVX512DQ__)) || defined(__AVX2__)
+  #include <immintrin.h>
+#endif
+
+u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end);
+u64 classify_word(u64 word);
+
+inline u64 classify_word(u64 word) {
+
+  u16 mem16[4];
+  memcpy(mem16, &word, sizeof(mem16));
+
+  mem16[0] = count_class_lookup16[mem16[0]];
+  mem16[1] = count_class_lookup16[mem16[1]];
+  mem16[2] = count_class_lookup16[mem16[2]];
+  mem16[3] = count_class_lookup16[mem16[3]];
+
+  memcpy(&word, mem16, sizeof(mem16));
+  return word;
+
+}
+
+void simplify_trace(afl_state_t *afl, u8 *bytes) {
+
+  u64 *mem = (u64 *)bytes;
+  u32  i = (afl->fsrv.map_size >> 3);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) {
+
+      u8 *mem8 = (u8 *)mem;
+
+      mem8[0] = simplify_lookup[mem8[0]];
+      mem8[1] = simplify_lookup[mem8[1]];
+      mem8[2] = simplify_lookup[mem8[2]];
+      mem8[3] = simplify_lookup[mem8[3]];
+      mem8[4] = simplify_lookup[mem8[4]];
+      mem8[5] = simplify_lookup[mem8[5]];
+      mem8[6] = simplify_lookup[mem8[6]];
+      mem8[7] = simplify_lookup[mem8[7]];
+
+    } else
+
+      *mem = 0x0101010101010101ULL;
+
+    mem++;
+
+  }
+
+}
+
+inline void classify_counts(afl_forkserver_t *fsrv) {
+
+  u64 *mem = (u64 *)fsrv->trace_bits;
+  u32  i = (fsrv->map_size >> 3);
+
+  while (i--) {
+
+    /* Optimize for sparse bitmaps. */
+
+    if (unlikely(*mem)) { *mem = classify_word(*mem); }
+
+    mem++;
+
+  }
+
+}
+
+/* Updates the virgin bits, then reflects whether a new count or a new tuple is
+ * seen in ret. */
+inline void discover_word(u8 *ret, u64 *current, u64 *virgin) {
+
+  /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
+     that have not been already cleared from the virgin map - since this will
+     almost always be the case. */
+
+  if (*current & *virgin) {
+
+    if (likely(*ret < 2)) {
+
+      u8 *cur = (u8 *)current;
+      u8 *vir = (u8 *)virgin;
+
+      /* Looks like we have not found any new bytes yet; see if any non-zero
+         bytes in current[] are pristine in virgin[]. */
+
+      if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
+          (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) ||
+          (cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) ||
+          (cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff))
+        *ret = 2;
+      else
+        *ret = 1;
+
+    }
+
+    *virgin &= ~*current;
+
+  }
+
+}
+
+#if defined(__AVX512F__) && defined(__AVX512DQ__)
+  #define PACK_SIZE 64
+inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
+
+  for (; current != current_end; virgin += 8, current += 8) {
+
+    __m512i  value = *(__m512i *)current;
+    __mmask8 mask = _mm512_testn_epi64_mask(value, value);
+
+    /* All bytes are zero. */
+    if (mask == 0xff) continue;
+
+      /* Look for nonzero bytes and check for new bits. */
+  #define UNROLL(x) \
+    if (!(mask & (1 << x)) && classify_word(current[x]) & virgin[x]) return 1
+    UNROLL(0);
+    UNROLL(1);
+    UNROLL(2);
+    UNROLL(3);
+    UNROLL(4);
+    UNROLL(5);
+    UNROLL(6);
+    UNROLL(7);
+  #undef UNROLL
+
+  }
+
+  return 0;
+
+}
+
+#endif
+
+#if !defined(PACK_SIZE) && defined(__AVX2__)
+  #define PACK_SIZE 32
+inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
+
+  __m256i zeroes = _mm256_setzero_si256();
+
+  for (; current != current_end; virgin += 4, current += 4) {
+
+    __m256i value = *(__m256i *)current;
+    __m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
+    u32     mask = _mm256_movemask_epi8(cmp);
+
+    /* All bytes are zero. */
+    if (mask == (u32)-1) continue;
+
+    /* Look for nonzero bytes and check for new bits. */
+    if (!(mask & 0xff) && classify_word(current[0]) & virgin[0]) return 1;
+    if (!(mask & 0xff00) && classify_word(current[1]) & virgin[1]) return 1;
+    if (!(mask & 0xff0000) && classify_word(current[2]) & virgin[2]) return 1;
+    if (!(mask & 0xff000000) && classify_word(current[3]) & virgin[3]) return 1;
+
+  }
+
+  return 0;
+
+}
+
+#endif
+
+#if !defined(PACK_SIZE)
+  #define PACK_SIZE 32
+inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
+
+  for (; current != current_end; virgin += 4, current += 4) {
+
+    if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
+    if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
+    if (current[2] && classify_word(current[2]) & virgin[2]) return 1;
+    if (current[3] && classify_word(current[3]) & virgin[3]) return 1;
+
+  }
+
+  return 0;
+
+}
+
+#endif
+
diff --git a/include/debug.h b/include/debug.h
index 5512023c..7f4a6be1 100644
--- a/include/debug.h
+++ b/include/debug.h
@@ -168,12 +168,84 @@
  * Debug & error macros *
  ************************/
 
-/* Just print stuff to the appropriate stream. */
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+  #include <unistd.h>
+  #pragma GCC diagnostic ignored "-Wformat-security"
+static inline const char *colorfilter(const char *x) {
+
+  static int once = 1;
+  static int disabled = 0;
+
+  if (once) {
+
+    /* when there is no tty -> we always want filtering
+     * when AFL_NO_UI is set filtering depends on AFL_NO_COLOR
+     * otherwise we want always colors
+     */
+    disabled =
+        isatty(2) && (!getenv("AFL_NO_UI") ||
+                      (!getenv("AFL_NO_COLOR") && !getenv("AFL_NO_COLOUR")));
+    once = 0;
+
+  }
+
+  if (likely(disabled)) return x;
+
+  static char monochromestring[4096];
+  char *      d = monochromestring;
+  int         in_seq = 0;
+
+  while (*x) {
+
+    if (in_seq && *x == 'm') {
+
+      in_seq = 0;
+
+    } else {
 
+      if (!in_seq && *x == '\x1b') { in_seq = 1; }
+      if (!in_seq) { *d++ = *x; }
+
+    }
+
+    ++x;
+
+  }
+
+  *d = '\0';
+  return monochromestring;
+
+}
+
+#else
+  #define colorfilter(x) x                        /* no filtering necessary */
+#endif
+
+/* macro magic to transform the first parameter to SAYF
+ * through colorfilter which strips coloring */
+#define GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, \
+                  _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26,  \
+                  _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38,  \
+                  _39, _40, NAME, ...)                                         \
+  NAME
+
+#define SAYF(...)                                                           \
+  GET_MACRO(__VA_ARGS__, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N,    \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, SAYF_N, \
+            SAYF_N, SAYF_1)                                                 \
+  (__VA_ARGS__)
+
+#define SAYF_1(x) MY_SAYF(colorfilter(x))
+#define SAYF_N(x, ...) MY_SAYF(colorfilter(x), __VA_ARGS__)
+
+/* Just print stuff to the appropriate stream. */
 #ifdef MESSAGES_TO_STDOUT
-  #define SAYF(x...) printf(x)
+  #define MY_SAYF(x...) printf(x)
 #else
-  #define SAYF(x...) fprintf(stderr, x)
+  #define MY_SAYF(x...) fprintf(stderr, x)
 #endif                                               /* ^MESSAGES_TO_STDOUT */
 
 /* Show a prefixed warning. */
diff --git a/include/envs.h b/include/envs.h
index c0f41ca5..e4e49c4d 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -78,8 +78,8 @@ static char *afl_environment_variables[] = {
     "AFL_LLVM_CTX",
     "AFL_LLVM_DICT2FILE",
     "AFL_LLVM_DOCUMENT_IDS",
-    "AFL_LLVM_INSTRUMENT",
     "AFL_LLVM_INSTRIM_LOOPHEAD",
+    "AFL_LLVM_INSTRUMENT",
     "AFL_LLVM_LTO_AUTODICTIONARY",
     "AFL_LLVM_AUTODICTIONARY",
     "AFL_LLVM_SKIPSINGLEBLOCK",
@@ -103,6 +103,10 @@ static char *afl_environment_variables[] = {
     "AFL_NO_ARITH",
     "AFL_NO_AUTODICT",
     "AFL_NO_BUILTIN",
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+    "AFL_NO_COLOR",
+    "AFL_NO_COLOUR",
+#endif
     "AFL_NO_CPU_RED",
     "AFL_NO_FORKSRV",
     "AFL_NO_UI",
diff --git a/include/forkserver.h b/include/forkserver.h
index 5d5c728f..8e029266 100644
--- a/include/forkserver.h
+++ b/include/forkserver.h
@@ -64,7 +64,7 @@ typedef struct afl_forkserver {
 
   FILE *plot_file;                      /* Gnuplot output file              */
 
-  /* Note: lat_run_timed_out is u32 to send it to the child as 4 byte array */
+  /* Note: last_run_timed_out is u32 to send it to the child as 4 byte array */
   u32 last_run_timed_out;               /* Traced process timed out?        */
 
   u8 last_kill_signal;                  /* Signal that killed the child     */
diff --git a/instrumentation/Makefile b/instrumentation/Makefile
new file mode 100644
index 00000000..6cdd1a07
--- /dev/null
+++ b/instrumentation/Makefile
@@ -0,0 +1,2 @@
+all:
+	@echo "no need to do make in the instrumentation/ directory :) - it is all done in the main one"
diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md
index 07636970..2705ce0d 100644
--- a/instrumentation/README.llvm.md
+++ b/instrumentation/README.llvm.md
@@ -168,26 +168,7 @@ This is the most powerful and effective fuzzing you can do.
 Please see [README.persistent_mode.md](README.persistent_mode.md) for a
 full explanation.
 
-## 7) Bonus feature: 'trace-pc-guard' mode
-
-LLVM is shipping with a built-in execution tracing feature
-that provides AFL with the necessary tracing data without the need to
-post-process the assembly or install any compiler plugins. See:
-
-  http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards
-
-If you have not an outdated compiler and want to give it a try, build
-targets this way:
-
-```
-AFL_LLVM_INSTRUMENT=PCGUARD  make
-```
-
-Note that this is currently the default if you use LLVM >= 7, as it is the best
-mode. Recommended is LLVM >= 9.
-If you have llvm 11+ and compiled afl-clang-lto - this is the only better mode.
-
-## 8) Bonus feature: 'dict2file' pass
+## 7) Bonus feature: 'dict2file' pass
 
 Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
 all constant string compare parameters will be written to this file to be
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index b1df26db..cddde87c 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -236,8 +236,8 @@ static void __afl_map_shm(void) {
 
   if (__afl_final_loc) {
 
-    if (__afl_final_loc % 8)
-      __afl_final_loc = (((__afl_final_loc + 7) >> 3) << 3);
+    if (__afl_final_loc % 32)
+      __afl_final_loc = (((__afl_final_loc + 31) >> 5) << 5);
     __afl_map_size = __afl_final_loc;
 
     if (__afl_final_loc > MAP_SIZE) {
diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc
index e116e7d1..25437609 100644
--- a/instrumentation/afl-gcc-pass.so.cc
+++ b/instrumentation/afl-gcc-pass.so.cc
@@ -516,7 +516,9 @@ struct afl_pass : gimple_opt_pass {
         "__cmplog",
         "__sancov",
         "msan.",
-        "LLVMFuzzer",
+        "LLVMFuzzerM",
+        "LLVMFuzzerC",
+        "LLVMFuzzerI",
         "__decide_deferred",
         "maybe_duplicate_stderr",
         "discard_output",
@@ -620,10 +622,11 @@ struct afl_pass : gimple_opt_pass {
             allowListFiles.push_back(line);
           else
             allowListFunctions.push_back(line);
-          getline(fileStream, line);
 
         }
 
+        getline(fileStream, line);
+
       }
 
       if (debug)
@@ -694,10 +697,11 @@ struct afl_pass : gimple_opt_pass {
             denyListFiles.push_back(line);
           else
             denyListFunctions.push_back(line);
-          getline(fileStream, line);
 
         }
 
+        getline(fileStream, line);
+
       }
 
       if (debug)
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index 21c4d204..a27c4069 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -70,7 +70,9 @@ bool isIgnoreFunction(const llvm::Function *F) {
       "__cmplog",
       "__sancov",
       "msan.",
-      "LLVMFuzzer",
+      "LLVMFuzzerM",
+      "LLVMFuzzerC",
+      "LLVMFuzzerI",
       "__decide_deferred",
       "maybe_duplicate_stderr",
       "discard_output",
@@ -166,10 +168,11 @@ void initInstrumentList() {
           allowListFiles.push_back(line);
         else
           allowListFunctions.push_back(line);
-        getline(fileStream, line);
 
       }
 
+      getline(fileStream, line);
+
     }
 
     if (debug)
@@ -240,10 +243,11 @@ void initInstrumentList() {
           denyListFiles.push_back(line);
         else
           denyListFunctions.push_back(line);
-        getline(fileStream, line);
 
       }
 
+      getline(fileStream, line);
+
     }
 
     if (debug)
diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh
index c24cc995..608db9e4 100755
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@@ -165,34 +165,141 @@ fi
 
 echo "Building for CPU target $CPU_TARGET"
 
+# --enable-pie seems to give a couple of exec's a second performance
+# improvement, much to my surprise. Not sure how universal this is..
+QEMU_CONF_FLAGS=" \
+  --audio-drv-list= \
+  --disable-blobs \
+  --disable-bochs \
+  --disable-brlapi \
+  --disable-bsd-user \
+  --disable-bzip2 \
+  --disable-cap-ng \
+  --disable-cloop \
+  --disable-curl \
+  --disable-curses \
+  --disable-dmg \
+  --disable-fdt \
+  --disable-gcrypt \
+  --disable-glusterfs \
+  --disable-gnutls \
+  --disable-gtk \
+  --disable-guest-agent \
+  --disable-iconv \
+  --disable-libiscsi \
+  --disable-libnfs \
+  --disable-libssh \
+  --disable-libusb \
+  --disable-linux-aio \
+  --disable-live-block-migration \
+  --disable-lzo \
+  --disable-nettle \
+  --disable-numa \
+  --disable-opengl \
+  --disable-parallels \
+  --disable-plugins \
+  --disable-qcow1 \
+  --disable-qed \
+  --disable-rbd \
+  --disable-rdma \
+  --disable-replication \
+  --disable-sdl \
+  --disable-seccomp \
+  --disable-sheepdog \
+  --disable-smartcard \
+  --disable-snappy \
+  --disable-spice \
+  --disable-system \
+  --disable-tools \
+  --disable-tpm \
+  --disable-usb-redir \
+  --disable-vde \
+  --disable-vdi \
+  --disable-vhost-crypto \
+  --disable-vhost-kernel \
+  --disable-vhost-net \
+  --disable-vhost-scsi \
+  --disable-vhost-user \
+  --disable-vhost-vdpa \
+  --disable-vhost-vsock \
+  --disable-virglrenderer \
+  --disable-virtfs \
+  --disable-vnc \
+  --disable-vnc-jpeg \
+  --disable-vnc-png \
+  --disable-vnc-sasl \
+  --disable-vte \
+  --disable-vvfat \
+  --disable-xen \
+  --disable-xen-pci-passthrough \
+  --disable-xfsctl \
+  --enable-pie \
+  --python=${PYTHONBIN} \
+  --target-list="${CPU_TARGET}-linux-user" \
+  --without-default-devices \
+  "
+
+if [ -n "${CROSS_PREFIX}" ]; then
+
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} --cross-prefix=${CROSS_PREFIX}"
+
+fi
+
 if [ "$STATIC" = "1" ]; then
 
   echo Building STATIC binary
-  ./configure --extra-cflags="-O3 -ggdb -DAFL_QEMU_STATIC_BUILD=1" \
-     --disable-bsd-user --disable-guest-agent --disable-strip --disable-werror \
-	  --disable-gcrypt --disable-debug-info --disable-debug-tcg --disable-tcg-interpreter \
-	  --enable-attr --disable-brlapi --disable-linux-aio --disable-bzip2 --disable-bluez --disable-cap-ng \
-	  --disable-curl --disable-fdt --disable-glusterfs --disable-gnutls --disable-nettle --disable-gtk \
-	  --disable-rdma --disable-libiscsi --disable-vnc-jpeg --disable-lzo --disable-curses \
-	  --disable-libnfs --disable-numa --disable-opengl --disable-vnc-png --disable-rbd --disable-vnc-sasl \
-	  --disable-sdl --disable-seccomp --disable-smartcard --disable-snappy --disable-spice --disable-libssh2 \
-	  --disable-libusb --disable-usb-redir --disable-vde --disable-vhost-net --disable-virglrenderer \
-	  --disable-virtfs --disable-vnc --disable-vte --disable-xen --disable-xen-pci-passthrough --disable-xfsctl \
-	  --enable-linux-user --disable-system --disable-blobs --disable-tools \
-	  --target-list="${CPU_TARGET}-linux-user" --static --disable-pie --cross-prefix=$CROSS_PREFIX --python="$PYTHONBIN" \
-	  || exit 1
+
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+    --static \
+    --extra-cflags=-DAFL_QEMU_STATIC_BUILD=1 \
+    "
+fi
+
+if [ "$DEBUG" = "1" ]; then
+
+  echo Building DEBUG binary
+
+  # --enable-gcov might go here but incurs a mesonbuild error on meson
+  # versions prior to 0.56:
+  # https://github.com/qemu/meson/commit/903d5dd8a7dc1d6f8bef79e66d6ebc07c
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+    --disable-strip \
+    --enable-debug \
+    --enable-debug-info \
+    --enable-debug-mutex \
+    --enable-debug-stack-usage \
+    --enable-debug-tcg \
+    --enable-qom-cast-debug \
+    --enable-werror \
+    "
 
 else
 
-  # --enable-pie seems to give a couple of exec's a second performance
-  # improvement, much to my surprise. Not sure how universal this is..
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+    --disable-debug-info \
+    --disable-debug-mutex \
+    --disable-debug-tcg \
+    --disable-qom-cast-debug \
+    --disable-stack-protector \
+    --disable-werror \
+    "
+
+fi
+
+if [ "$PROFILING" = "1" ]; then
+
+  echo Building PROFILED binary
 
-  ./configure --disable-system \
-    --enable-linux-user --disable-gtk --disable-sdl --disable-vnc --disable-werror \
-    --target-list="${CPU_TARGET}-linux-user" --enable-pie $CROSS_PREFIX --python="$PYTHONBIN" || exit 1
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+    --enable-gprof \
+    --enable-profiler \
+    "
 
 fi
 
+# shellcheck disable=SC2086
+./configure ${QEMU_CONF_FLAGS} || exit 1
+
 echo "[+] Configuration complete."
 
 echo "[*] Attempting to build QEMU (fingers crossed!)..."
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index 2780deff..a6825ef6 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -103,19 +103,29 @@ static u32 map_size = MAP_SIZE;
 /* Classify tuple counts. This is a slow & naive version, but good enough here.
  */
 
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
 static u8 count_class_lookup[256] = {
 
     [0] = 0,
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
 static void classify_counts(u8 *mem) {
 
diff --git a/src/afl-cc.c b/src/afl-cc.c
index 2aeb2178..66f4860f 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -62,7 +62,7 @@ u8          use_stdin;                                             /* dummy */
 
 enum {
 
-  INSTURMENT_DEFAULT = 0,
+  INSTRUMENT_DEFAULT = 0,
   INSTRUMENT_CLASSIC = 1,
   INSTRUMENT_AFL = 1,
   INSTRUMENT_PCGUARD = 2,
@@ -70,6 +70,8 @@ enum {
   INSTRUMENT_CFG = 3,
   INSTRUMENT_LTO = 4,
   INSTRUMENT_LLVMNATIVE = 5,
+  INSTRUMENT_GCC = 6,
+  INSTRUMENT_CLANG = 7,
   INSTRUMENT_OPT_CTX = 8,
   INSTRUMENT_OPT_NGRAM = 16
 
@@ -77,9 +79,24 @@ enum {
 
 char instrument_mode_string[18][18] = {
 
-    "DEFAULT", "CLASSIC", "PCGUARD", "CFG", "LTO", "", "PCGUARD-NATIVE",
-    "",        "CTX",     "",        "",    "",    "", "",
-    "",        "",        "NGRAM",   ""
+    "DEFAULT",
+    "CLASSIC",
+    "PCGUARD",
+    "CFG",
+    "LTO",
+    "PCGUARD-NATIVE",
+    "GCC",
+    "CLANG",
+    "CTX",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "NGRAM",
+    ""
 
 };
 
@@ -89,14 +106,15 @@ enum {
   LTO = 1,
   LLVM = 2,
   GCC_PLUGIN = 3,
-  GCC = 4
+  GCC = 4,
+  CLANG = 5
 
 };
 
-char compiler_mode_string[6][12] = {
+char compiler_mode_string[7][12] = {
 
     "AUTOSELECT", "LLVM-LTO", "LLVM", "GCC_PLUGIN",
-    "GCC",        ""
+    "GCC",        "CLANG",    ""
 
 };
 
@@ -324,6 +342,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
           alt_cxx = clang_mode ? "clang++" : "g++";
 
+        } else if (compiler_mode == CLANG) {
+
+          alt_cxx = "clang++";
+
         } else {
 
           alt_cxx = "g++";
@@ -357,6 +379,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
           alt_cc = clang_mode ? "clang" : "gcc";
 
+        } else if (compiler_mode == CLANG) {
+
+          alt_cc = "clang";
+
         } else {
 
           alt_cc = "gcc";
@@ -380,12 +406,16 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-  if (compiler_mode == GCC) {
+  if (compiler_mode == GCC || compiler_mode == CLANG) {
 
     cc_params[cc_par_cnt++] = "-B";
     cc_params[cc_par_cnt++] = obj_path;
 
-    if (clang_mode) { cc_params[cc_par_cnt++] = "-no-integrated-as"; }
+    if (clang_mode || compiler_mode == CLANG) {
+
+      cc_params[cc_par_cnt++] = "-no-integrated-as";
+
+    }
 
   }
 
@@ -996,12 +1026,16 @@ int main(int argc, char **argv, char **envp) {
 
   } else if (strncmp(callname, "afl-gcc", 7) == 0 ||
 
-             strncmp(callname, "afl-g++", 7) == 0 ||
-
-             strncmp(callname, "afl-clang", 9) == 0) {
+             strncmp(callname, "afl-g++", 7) == 0) {
 
     compiler_mode = GCC;
 
+  } else if (strncmp(callname, "afl-clang", 9) == 0 &&
+
+             strstr(callname, "fast") == NULL) {
+
+    compiler_mode = CLANG;
+
   }
 
   if ((ptr = getenv("AFL_CC_COMPILER"))) {
@@ -1042,9 +1076,11 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
-  if (strncmp(callname, "afl-clang", 9) == 0) {
+  if (strncmp(callname, "afl-clang", 9) == 0 &&
+      strstr(callname, "fast") == NULL) {
 
     clang_mode = 1;
+    compiler_mode = CLANG;
 
     if (strncmp(callname, "afl-clang++", 11) == 0) { plusplus_mode = 1; }
 
@@ -1072,6 +1108,34 @@ int main(int argc, char **argv, char **envp) {
 
         compiler_mode = LLVM;
 
+      } else if (strncasecmp(ptr, "PCGUARD", 7) == 0 ||
+
+                 strncasecmp(ptr, "PC-GUARD", 8) == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_PCGUARD;
+
+      } else if (strcasecmp(ptr, "INSTRIM") == 0 ||
+
+                 strcasecmp(ptr, "CFG") == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_CFG;
+
+      } else if (strcasecmp(ptr, "AFL") == 0 ||
+
+                 strcasecmp(ptr, "CLASSIC") == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_CLASSIC;
+
+      } else if (strcasecmp(ptr, "LLVMNATIVE") == 0 ||
+
+                 strcasecmp(ptr, "LLVM-NATIVE") == 0) {
+
+        compiler_mode = LLVM;
+        instrument_mode = INSTRUMENT_LLVMNATIVE;
+
       } else if (strncasecmp(ptr, "GCC_P", 5) == 0 ||
 
                  strncasecmp(ptr, "GCC-P", 5) == 0 ||
@@ -1083,6 +1147,10 @@ int main(int argc, char **argv, char **envp) {
 
         compiler_mode = GCC;
 
+      } else if (strncasecmp(ptr, "CLANG", 5) == 0) {
+
+        compiler_mode = CLANG;
+
       } else
 
         FATAL("Unknown --afl-... compiler mode: %s\n", argv[i]);
@@ -1212,6 +1280,28 @@ int main(int argc, char **argv, char **envp) {
 
       }
 
+      if (strcasecmp(ptr, "gcc") == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_GCC)
+          instrument_mode = INSTRUMENT_GCC;
+        else if (instrument_mode != INSTRUMENT_GCC)
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+        compiler_mode = GCC;
+
+      }
+
+      if (strcasecmp(ptr, "clang") == 0) {
+
+        if (!instrument_mode || instrument_mode == INSTRUMENT_CLANG)
+          instrument_mode = INSTRUMENT_CLANG;
+        else if (instrument_mode != INSTRUMENT_CLANG)
+          FATAL("main instrumentation mode already set with %s",
+                instrument_mode_string[instrument_mode]);
+        compiler_mode = CLANG;
+
+      }
+
       if (strncasecmp(ptr, "ctx", strlen("ctx")) == 0) {
 
         instrument_opt_mode |= INSTRUMENT_OPT_CTX;
@@ -1270,6 +1360,22 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if (compiler_mode == GCC) {
+
+    if (clang_mode) {
+
+      instrument_mode = CLANG;
+
+    } else {
+
+      instrument_mode = GCC;
+
+    }
+
+  }
+
+  if (compiler_mode == CLANG) { instrument_mode = CLANG; }
+
   if (argc < 2 || strncmp(argv[1], "-h", 2) == 0) {
 
     printf("afl-cc" VERSION
@@ -1316,7 +1422,7 @@ int main(int argc, char **argv, char **envp) {
         "  [GCC_PLUGIN] gcc plugin: %s%s\n"
         "      CLASSIC              DEFAULT    no  yes     yes  no     no  no  "
         "   yes\n"
-        "  [GCC] simple gcc:        %s%s\n"
+        "  [GCC/CLANG] simple gcc/clang: %s%s\n"
         "      CLASSIC              DEFAULT    no  no      no   no     no  no  "
         "   no\n\n",
         have_lto ? "AVAILABLE" : "unavailable!",
@@ -1328,7 +1434,7 @@ int main(int argc, char **argv, char **envp) {
         have_gcc_plugin ? "AVAILABLE" : "unavailable!",
         compiler_mode == GCC_PLUGIN ? " [SELECTED]" : "",
         have_gcc ? "AVAILABLE" : "unavailable!",
-        compiler_mode == GCC ? " [SELECTED]" : "");
+        (compiler_mode == GCC || compiler_mode == CLANG) ? " [SELECTED]" : "");
 
     SAYF(
         "Modes:\n"
@@ -1346,6 +1452,10 @@ int main(int argc, char **argv, char **envp) {
         "Sub-Modes: (set via env AFL_LLVM_INSTRUMENT, afl-cc selects the best "
         "available)\n"
         "  PCGUARD: Dominator tree instrumentation (best!) (README.llvm.md)\n"
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+        "  NATIVE:  use llvm's native PCGUARD instrumentation (less "
+        "performant)\n"
+#endif
         "  CLASSIC: decision target instrumentation (README.llvm.md)\n"
         "  CTX:     CLASSIC + callee context (instrumentation/README.ctx.md)\n"
         "  NGRAM-x: CLASSIC + previous path "
@@ -1432,7 +1542,7 @@ int main(int argc, char **argv, char **envp) {
             "  AFL_LLVM_LAF_SPLIT_FLOATS: cascaded comparisons on floats\n"
             "  AFL_LLVM_LAF_TRANSFORM_COMPARES: cascade comparisons for string "
             "functions\n"
-            "  AFL_LLVM_INSTRUMENT_ALLOW/AFL_LLVM_INSTRUMENT_DENY: enable "
+            "  AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST: enable "
             "instrument allow/\n"
             "    deny listing (selective instrumentation)\n");
 
@@ -1441,7 +1551,8 @@ int main(int argc, char **argv, char **envp) {
             "  AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen "
             "mutator)\n"
             "  AFL_LLVM_INSTRUMENT: set instrumentation mode:\n"
-            "    CLASSIC, INSTRIM, PCGUARD, LTO, CTX, NGRAM-2 ... NGRAM-16\n"
+            "    CLASSIC, INSTRIM, PCGUARD, LTO, GCC, CLANG, CTX, NGRAM-2 ... "
+            "NGRAM-16\n"
             " You can also use the old environment variables instead:\n"
             "  AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n"
             "  AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
diff --git a/src/afl-common.c b/src/afl-common.c
index 4df22394..6dc8abe0 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <strings.h>
+#include <math.h>
 
 #include "debug.h"
 #include "alloc-inl.h"
@@ -786,6 +787,10 @@ u8 *u_stringify_float(u8 *buf, double val) {
 
     sprintf(buf, "%0.01f", val);
 
+  } else if (unlikely(isnan(val) || isfinite(val))) {
+
+    strcpy(buf, "999.9");
+
   } else {
 
     return u_stringify_int(buf, (u64)val);
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 3afb94be..90fa55e9 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -641,11 +641,11 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
         if (!fsrv->map_size) { fsrv->map_size = MAP_SIZE; }
 
-        if (unlikely(tmp_map_size % 8)) {
+        if (unlikely(tmp_map_size % 32)) {
 
           // should not happen
           WARNF("Target reported non-aligned map size of %u", tmp_map_size);
-          tmp_map_size = (((tmp_map_size + 8) >> 3) << 3);
+          tmp_map_size = (((tmp_map_size + 31) >> 5) << 5);
 
         }
 
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index f1ca7400..1cb9b15f 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -25,6 +25,9 @@
 
 #include "afl-fuzz.h"
 #include <limits.h>
+#if !defined NAME_MAX
+#define NAME_MAX _XOPEN_NAME_MAX
+#endif
 
 /* Write bitmap to file. The bitmap is useful mostly for the secret
    -B option, to focus a separate fuzzing session on a particular
@@ -49,101 +52,6 @@ void write_bitmap(afl_state_t *afl) {
 
 }
 
-/* Check if the current execution path brings anything new to the table.
-   Update virgin bits to reflect the finds. Returns 1 if the only change is
-   the hit-count for a particular tuple; 2 if there are new tuples seen.
-   Updates the map, so subsequent calls will always return 0.
-
-   This function is called after every exec() on a fairly large buffer, so
-   it needs to be fast. We do this in 32-bit and 64-bit flavors. */
-
-u8 __attribute__((hot)) has_new_bits(afl_state_t *afl, u8 *virgin_map) {
-
-#ifdef WORD_SIZE_64
-
-  u64 *current = (u64 *)afl->fsrv.trace_bits;
-  u64 *virgin = (u64 *)virgin_map;
-
-  u32 i = (afl->fsrv.map_size >> 3);
-
-#else
-
-  u32 *current = (u32 *)afl->fsrv.trace_bits;
-  u32 *virgin = (u32 *)virgin_map;
-
-  u32 i = (afl->fsrv.map_size >> 2);
-
-#endif                                                     /* ^WORD_SIZE_64 */
-  // the map size must be a minimum of 8 bytes.
-  // for variable/dynamic map sizes this is ensured in the forkserver
-
-  u8 ret = 0;
-
-  while (i--) {
-
-    /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
-       that have not been already cleared from the virgin map - since this will
-       almost always be the case. */
-
-    // the (*current) is unnecessary but speeds up the overall comparison
-    if (unlikely(*current) && unlikely(*current & *virgin)) {
-
-      if (likely(ret < 2)) {
-
-        u8 *cur = (u8 *)current;
-        u8 *vir = (u8 *)virgin;
-
-        /* Looks like we have not found any new bytes yet; see if any non-zero
-           bytes in current[] are pristine in virgin[]. */
-
-#ifdef WORD_SIZE_64
-
-        if (*virgin == 0xffffffffffffffff || (cur[0] && vir[0] == 0xff) ||
-            (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) ||
-            (cur[3] && vir[3] == 0xff) || (cur[4] && vir[4] == 0xff) ||
-            (cur[5] && vir[5] == 0xff) || (cur[6] && vir[6] == 0xff) ||
-            (cur[7] && vir[7] == 0xff)) {
-
-          ret = 2;
-
-        } else {
-
-          ret = 1;
-
-        }
-
-#else
-
-        if (*virgin == 0xffffffff || (cur[0] && vir[0] == 0xff) ||
-            (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) ||
-            (cur[3] && vir[3] == 0xff))
-          ret = 2;
-        else
-          ret = 1;
-
-#endif                                                     /* ^WORD_SIZE_64 */
-
-      }
-
-      *virgin &= ~*current;
-
-    }
-
-    ++current;
-    ++virgin;
-
-  }
-
-  if (unlikely(ret) && likely(virgin_map == afl->virgin_bits)) {
-
-    afl->bitmap_changed = 1;
-
-  }
-
-  return ret;
-
-}
-
 /* Count the number of bits set in the provided bitmap. Used for the status
    screen several times every second, does not have to be fast. */
 
@@ -192,10 +100,10 @@ u32 count_bytes(afl_state_t *afl, u8 *mem) {
     u32 v = *(ptr++);
 
     if (!v) { continue; }
-    if (v & 0x000000ff) { ++ret; }
-    if (v & 0x0000ff00) { ++ret; }
-    if (v & 0x00ff0000) { ++ret; }
-    if (v & 0xff000000) { ++ret; }
+    if (v & 0x000000ffU) { ++ret; }
+    if (v & 0x0000ff00U) { ++ret; }
+    if (v & 0x00ff0000U) { ++ret; }
+    if (v & 0xff000000U) { ++ret; }
 
   }
 
@@ -219,11 +127,11 @@ u32 count_non_255_bytes(afl_state_t *afl, u8 *mem) {
     /* This is called on the virgin bitmap, so optimize for the most likely
        case. */
 
-    if (v == 0xffffffff) { continue; }
-    if ((v & 0x000000ff) != 0x000000ff) { ++ret; }
-    if ((v & 0x0000ff00) != 0x0000ff00) { ++ret; }
-    if ((v & 0x00ff0000) != 0x00ff0000) { ++ret; }
-    if ((v & 0xff000000) != 0xff000000) { ++ret; }
+    if (v == 0xffffffffU) { continue; }
+    if ((v & 0x000000ffU) != 0x000000ffU) { ++ret; }
+    if ((v & 0x0000ff00U) != 0x0000ff00U) { ++ret; }
+    if ((v & 0x00ff0000U) != 0x00ff0000U) { ++ret; }
+    if ((v & 0xff000000U) != 0xff000000U) { ++ret; }
 
   }
 
@@ -235,98 +143,43 @@ u32 count_non_255_bytes(afl_state_t *afl, u8 *mem) {
    and replacing it with 0x80 or 0x01 depending on whether the tuple
    is hit or not. Called on every new crash or timeout, should be
    reasonably fast. */
-
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
+#define TIMES255(x) TIMES64(x),TIMES64(x),TIMES64(x),TIMES32(x),TIMES16(x),TIMES8(x),TIMES4(x),x,x,x
 const u8 simplify_lookup[256] = {
 
-    [0] = 1, [1 ... 255] = 128
+    [0] = 1, [1] = TIMES255(128)
 
 };
 
-#ifdef WORD_SIZE_64
-
-void simplify_trace(afl_state_t *afl, u64 *mem) {
-
-  u32 i = (afl->fsrv.map_size >> 3);
-
-  while (i--) {
-
-    /* Optimize for sparse bitmaps. */
-
-    if (unlikely(*mem)) {
-
-      u8 *mem8 = (u8 *)mem;
-
-      mem8[0] = simplify_lookup[mem8[0]];
-      mem8[1] = simplify_lookup[mem8[1]];
-      mem8[2] = simplify_lookup[mem8[2]];
-      mem8[3] = simplify_lookup[mem8[3]];
-      mem8[4] = simplify_lookup[mem8[4]];
-      mem8[5] = simplify_lookup[mem8[5]];
-      mem8[6] = simplify_lookup[mem8[6]];
-      mem8[7] = simplify_lookup[mem8[7]];
-
-    } else {
-
-      *mem = 0x0101010101010101ULL;
-
-    }
-
-    ++mem;
-
-  }
-
-}
-
-#else
-
-void simplify_trace(afl_state_t *afl, u32 *mem) {
-
-  u32 i = (afl->fsrv.map_size >> 2);
-
-  while (i--) {
-
-    /* Optimize for sparse bitmaps. */
-
-    if (unlikely(*mem)) {
-
-      u8 *mem8 = (u8 *)mem;
-
-      mem8[0] = simplify_lookup[mem8[0]];
-      mem8[1] = simplify_lookup[mem8[1]];
-      mem8[2] = simplify_lookup[mem8[2]];
-      mem8[3] = simplify_lookup[mem8[3]];
-
-    } else
-
-      *mem = 0x01010101;
-
-    ++mem;
-
-  }
-
-}
-
-#endif                                                     /* ^WORD_SIZE_64 */
-
 /* Destructively classify execution counts in a trace. This is used as a
    preprocessing step for any newly acquired traces. Called on every exec,
    must be fast. */
 
-static const u8 count_class_lookup8[256] = {
+const u8 count_class_lookup8[256] = {
 
     [0] = 0,
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES255
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
-static u16 count_class_lookup16[65536];
+u16 count_class_lookup16[65536];
 
 void init_count_class16(void) {
 
@@ -345,63 +198,87 @@ void init_count_class16(void) {
 
 }
 
-#ifdef WORD_SIZE_64
+/* Import coverage processing routines. */
 
-void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) {
+#ifdef WORD_SIZE_64
+  #include "coverage-64.h"
+#else
+  #include "coverage-32.h"
+#endif
 
-  u64 *mem = (u64 *)fsrv->trace_bits;
+/* Check if the current execution path brings anything new to the table.
+   Update virgin bits to reflect the finds. Returns 1 if the only change is
+   the hit-count for a particular tuple; 2 if there are new tuples seen.
+   Updates the map, so subsequent calls will always return 0.
 
-  u32 i = (fsrv->map_size >> 3);
+   This function is called after every exec() on a fairly large buffer, so
+   it needs to be fast. We do this in 32-bit and 64-bit flavors. */
 
-  while (i--) {
+inline u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) {
 
-    /* Optimize for sparse bitmaps. */
+#ifdef WORD_SIZE_64
 
-    if (unlikely(*mem)) {
+  u64 *current = (u64 *)afl->fsrv.trace_bits;
+  u64 *virgin = (u64 *)virgin_map;
 
-      u16 *mem16 = (u16 *)mem;
+  u32 i = (afl->fsrv.map_size >> 3);
 
-      mem16[0] = count_class_lookup16[mem16[0]];
-      mem16[1] = count_class_lookup16[mem16[1]];
-      mem16[2] = count_class_lookup16[mem16[2]];
-      mem16[3] = count_class_lookup16[mem16[3]];
+#else
 
-    }
+  u32 *current = (u32 *)afl->fsrv.trace_bits;
+  u32 *virgin = (u32 *)virgin_map;
 
-    ++mem;
+  u32 i = (afl->fsrv.map_size >> 2);
 
-  }
+#endif                                                     /* ^WORD_SIZE_64 */
 
-}
+  u8 ret = 0;
+  while (i--) {
 
-#else
+    if (unlikely(*current)) discover_word(&ret, current, virgin);
 
-void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) {
+    current++;
+    virgin++;
 
-  u32 *mem = (u32 *)fsrv->trace_bits;
+  }
 
-  u32 i = (fsrv->map_size >> 2);
+  if (unlikely(ret) && likely(virgin_map == afl->virgin_bits))
+    afl->bitmap_changed = 1;
 
-  while (i--) {
+  return ret;
 
-    /* Optimize for sparse bitmaps. */
+}
 
-    if (unlikely(*mem)) {
+/* A combination of classify_counts and has_new_bits. If 0 is returned, then the
+ * trace bits are kept as-is. Otherwise, the trace bits are overwritten with
+ * classified values.
+ *
+ * This accelerates the processing: in most cases, no interesting behavior
+ * happen, and the trace bits will be discarded soon. This function optimizes
+ * for such cases: one-pass scan on trace bits without modifying anything. Only
+ * on rare cases it fall backs to the slow path: classify_counts() first, then
+ * return has_new_bits(). */
 
-      u16 *mem16 = (u16 *)mem;
+inline u8 has_new_bits_unclassified(afl_state_t *afl, u8 *virgin_map) {
 
-      mem16[0] = count_class_lookup16[mem16[0]];
-      mem16[1] = count_class_lookup16[mem16[1]];
+  /* Handle the hot path first: no new coverage */
+  u8 *end = afl->fsrv.trace_bits + afl->fsrv.map_size;
 
-    }
+#ifdef WORD_SIZE_64
 
-    ++mem;
+  if (!skim((u64 *)virgin_map, (u64 *)afl->fsrv.trace_bits, (u64 *)end))
+    return 0;
 
-  }
+#else
 
-}
+  if (!skim((u32 *)virgin_map, (u32 *)afl->fsrv.trace_bits, (u32 *)end))
+    return 0;
 
 #endif                                                     /* ^WORD_SIZE_64 */
+  classify_counts(&afl->fsrv);
+  return has_new_bits(afl, virgin_map);
+
+}
 
 /* Compact trace bytes into a smaller bitmap. We effectively just drop the
    count information here. This is called only sporadically, for some
@@ -581,7 +458,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
   u8 *queue_fn = "";
   u8  new_bits = '\0';
   s32 fd;
-  u8  keeping = 0, res;
+  u8  keeping = 0, res, classified = 0;
   u64 cksum = 0;
 
   u8 fn[PATH_MAX];
@@ -605,13 +482,17 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
     /* Keep only if there are new bits in the map, add to queue for
        future fuzzing, etc. */
 
-    if (!(new_bits = has_new_bits(afl, afl->virgin_bits))) {
+    new_bits = has_new_bits_unclassified(afl, afl->virgin_bits);
+
+    if (likely(!new_bits)) {
 
       if (unlikely(afl->crash_mode)) { ++afl->total_crashes; }
       return 0;
 
     }
 
+    classified = new_bits;
+
 #ifndef SIMPLE_FILES
 
     queue_fn = alloc_printf(
@@ -715,11 +596,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-#ifdef WORD_SIZE_64
-        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
-#else
-        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
+        if (!classified) {
+
+          classify_counts(&afl->fsrv);
+          classified = 1;
+
+        }
+
+        simplify_trace(afl, afl->fsrv.trace_bits);
 
         if (!has_new_bits(afl, afl->virgin_tmout)) { return keeping; }
 
@@ -764,6 +648,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
         u8 new_fault;
         write_to_testcase(afl, mem, len);
         new_fault = fuzz_run_target(afl, &afl->fsrv, afl->hang_tmout);
+        classify_counts(&afl->fsrv);
 
         /* A corner case that one user reported bumping into: increasing the
            timeout actually uncovers a crash. Make sure we don't discard it if
@@ -812,11 +697,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (likely(!afl->non_instrumented_mode)) {
 
-#ifdef WORD_SIZE_64
-        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
-#else
-        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
+        if (!classified) {
+
+          classify_counts(&afl->fsrv);
+          classified = 1;
+
+        }
+
+        simplify_trace(afl, afl->fsrv.trace_bits);
 
         if (!has_new_bits(afl, afl->virgin_crash)) { return keeping; }
 
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 0db3a111..ec937f29 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -666,7 +666,7 @@ void read_testcases(afl_state_t *afl, u8 *directory) {
 
   }
 
-  if (afl->shuffle_queue && nl_cnt > 1) {
+  if (unlikely(afl->old_seed_selection && afl->shuffle_queue && nl_cnt > 1)) {
 
     ACTF("Shuffling queue...");
     shuffle_ptrs(afl, (void **)nl, nl_cnt);
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index a97ceb89..32cca579 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -28,6 +28,9 @@
 #include <sys/time.h>
 #include <signal.h>
 #include <limits.h>
+#if !defined NAME_MAX
+#define NAME_MAX _XOPEN_NAME_MAX
+#endif
 
 #include "cmplog.h"
 
@@ -62,8 +65,6 @@ fuzz_run_target(afl_state_t *afl, afl_forkserver_t *fsrv, u32 timeout) {
   time_spent_start = (spec.tv_sec * 1000000000) + spec.tv_nsec;
 #endif
 
-  // TODO: Don't classify for faults?
-  classify_counts(fsrv);
   return res;
 
 }
@@ -379,6 +380,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
     }
 
+    classify_counts(&afl->fsrv);
     cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
     if (q->exec_cksum != cksum) {
 
@@ -767,13 +769,14 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
       write_with_gap(afl, in_buf, q->len, remove_pos, trim_avail);
 
       fault = fuzz_run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
-      ++afl->trim_execs;
 
       if (afl->stop_soon || fault == FSRV_RUN_ERROR) { goto abort_trimming; }
 
       /* Note that we don't keep track of crashes or hangs here; maybe TODO?
        */
 
+      ++afl->trim_execs;
+      classify_counts(&afl->fsrv);
       cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
       /* If the deletion had no impact on the trace, make it permanent. This
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 9c51a3ef..7053572b 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -401,6 +401,23 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_crash_exitcode =
                 (u8 *)get_afl_env(afl_environment_variables[i]);
 
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+
+          } else if (!strncmp(env, "AFL_NO_COLOR",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_statsd_tags_flavor =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+
+          } else if (!strncmp(env, "AFL_NO_COLOUR",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_statsd_tags_flavor =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+#endif
+
           }
 
         } else {
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 321bbb35..50e2ef15 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -371,6 +371,8 @@ void show_stats(afl_state_t *afl) {
 
   if (!afl->stats_last_execs) {
 
+    if (unlikely(cur_ms == afl->start_time)) --afl->start_time;
+
     afl->stats_avg_exec =
         ((double)afl->fsrv.total_execs) * 1000 / (cur_ms - afl->start_time);
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 391d4c4f..2af374f2 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -156,6 +156,13 @@ static void usage(u8 *argv0, int more_help) {
 
   if (more_help > 1) {
 
+#if defined USE_COLOR && !defined ALWAYS_COLORED
+  #define DYN_COLOR \
+    "AFL_NO_COLOR or AFL_NO_COLOUR: switch colored console output off\n"
+#else
+  #define DYN_COLOR
+#endif
+
     SAYF(
       "Environment variables used:\n"
       "LD_BIND_LAZY: do not set LD_BIND_NOW env var for target\n"
@@ -194,6 +201,9 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n"
       "AFL_NO_SNAPSHOT: do not use the snapshot feature (if the snapshot lkm is loaded)\n"
       "AFL_NO_UI: switch status screen off\n"
+
+      DYN_COLOR
+
       "AFL_PATH: path to AFL support binaries\n"
       "AFL_PYTHON_MODULE: mutate and trim inputs with the specified Python module\n"
       "AFL_QUIET: suppress forkserver status messages\n"
@@ -298,6 +308,17 @@ int main(int argc, char **argv_orig, char **envp) {
   struct timeval  tv;
   struct timezone tz;
 
+  #if defined USE_COLOR && defined ALWAYS_COLORED
+  if (getenv("AFL_NO_COLOR") || getenv("AFL_NO_COLOUR")) {
+
+    WARNF(
+        "Setting AFL_NO_COLOR has no effect (colors are configured on at "
+        "compile time)");
+
+  }
+
+  #endif
+
   char **argv = argv_cpy_dup(argc, argv_orig);
 
   afl_state_t *afl = calloc(1, sizeof(afl_state_t));
diff --git a/src/afl-performance.c b/src/afl-performance.c
index e070a05e..89b170eb 100644
--- a/src/afl-performance.c
+++ b/src/afl-performance.c
@@ -27,45 +27,49 @@
 #include "xxhash.h"
 #undef XXH_INLINE_ALL
 
-/* we use xoshiro256** instead of rand/random because it is 10x faster and has
-   better randomness properties. */
-
-static inline uint64_t rotl(const uint64_t x, int k) {
-
-  return (x << k) | (x >> (64 - k));
-
-}
-
 void rand_set_seed(afl_state_t *afl, s64 init_seed) {
 
   afl->init_seed = init_seed;
   afl->rand_seed[0] =
       hash64((u8 *)&afl->init_seed, sizeof(afl->init_seed), HASH_CONST);
   afl->rand_seed[1] = afl->rand_seed[0] ^ 0x1234567890abcdef;
-  afl->rand_seed[2] = afl->rand_seed[0] & 0x0123456789abcdef;
-  afl->rand_seed[3] = afl->rand_seed[0] | 0x01abcde43f567908;
+  afl->rand_seed[2] = (afl->rand_seed[0] & 0x1234567890abcdef) ^
+                      (afl->rand_seed[1] | 0xfedcba9876543210);
 
 }
 
-inline uint64_t rand_next(afl_state_t *afl) {
+#define ROTL(d, lrot) ((d << (lrot)) | (d >> (8 * sizeof(d) - (lrot))))
 
-  const uint64_t result =
-      rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0];
+#ifdef WORD_SIZE_64
+// romuDuoJr
+inline AFL_RAND_RETURN rand_next(afl_state_t *afl) {
 
-  const uint64_t t = afl->rand_seed[1] << 17;
+  AFL_RAND_RETURN xp = afl->rand_seed[0];
+  afl->rand_seed[0] = 15241094284759029579u * afl->rand_seed[1];
+  afl->rand_seed[1] = afl->rand_seed[1] - xp;
+  afl->rand_seed[1] = ROTL(afl->rand_seed[1], 27);
+  return xp;
 
-  afl->rand_seed[2] ^= afl->rand_seed[0];
-  afl->rand_seed[3] ^= afl->rand_seed[1];
-  afl->rand_seed[1] ^= afl->rand_seed[2];
-  afl->rand_seed[0] ^= afl->rand_seed[3];
+}
 
-  afl->rand_seed[2] ^= t;
+#else
+// RomuTrio32
+inline AFL_RAND_RETURN rand_next(afl_state_t *afl) {
+
+  AFL_RAND_RETURN xp = afl->rand_seed[0], yp = afl->rand_seed[1],
+                  zp = afl->rand_seed[2];
+  afl->rand_seed[0] = 3323815723u * zp;
+  afl->rand_seed[1] = yp - xp;
+  afl->rand_seed[1] = ROTL(afl->rand_seed[1], 6);
+  afl->rand_seed[2] = zp - yp;
+  afl->rand_seed[2] = ROTL(afl->rand_seed[2], 22);
+  return xp;
 
-  afl->rand_seed[3] = rotl(afl->rand_seed[3], 45);
+}
 
-  return result;
+#endif
 
-}
+#undef ROTL
 
 /* returns a double between 0.000000000 and 1.000000000 */
 
@@ -75,80 +79,6 @@ inline double rand_next_percent(afl_state_t *afl) {
 
 }
 
-/* This is the jump function for the generator. It is equivalent
-   to 2^128 calls to rand_next(); it can be used to generate 2^128
-   non-overlapping subsequences for parallel computations. */
-
-void jump(afl_state_t *afl) {
-
-  static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c,
-                                  0xa9582618e03fc9aa, 0x39abdc4529b1661c};
-  size_t                i, b;
-  uint64_t              s0 = 0;
-  uint64_t              s1 = 0;
-  uint64_t              s2 = 0;
-  uint64_t              s3 = 0;
-  for (i = 0; i < (sizeof(JUMP) / sizeof(*JUMP)); i++)
-    for (b = 0; b < 64; b++) {
-
-      if (JUMP[i] & UINT64_C(1) << b) {
-
-        s0 ^= afl->rand_seed[0];
-        s1 ^= afl->rand_seed[1];
-        s2 ^= afl->rand_seed[2];
-        s3 ^= afl->rand_seed[3];
-
-      }
-
-      rand_next(afl);
-
-    }
-
-  afl->rand_seed[0] = s0;
-  afl->rand_seed[1] = s1;
-  afl->rand_seed[2] = s2;
-  afl->rand_seed[3] = s3;
-
-}
-
-/* This is the long-jump function for the generator. It is equivalent to
-   2^192 calls to rand_next(); it can be used to generate 2^64 starting points,
-   from each of which jump() will generate 2^64 non-overlapping
-   subsequences for parallel distributed computations. */
-
-void long_jump(afl_state_t *afl) {
-
-  static const uint64_t LONG_JUMP[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3,
-                                       0x77710069854ee241, 0x39109bb02acbe635};
-
-  size_t   i, b;
-  uint64_t s0 = 0;
-  uint64_t s1 = 0;
-  uint64_t s2 = 0;
-  uint64_t s3 = 0;
-  for (i = 0; i < (sizeof(LONG_JUMP) / sizeof(*LONG_JUMP)); i++)
-    for (b = 0; b < 64; b++) {
-
-      if (LONG_JUMP[i] & UINT64_C(1) << b) {
-
-        s0 ^= afl->rand_seed[0];
-        s1 ^= afl->rand_seed[1];
-        s2 ^= afl->rand_seed[2];
-        s3 ^= afl->rand_seed[3];
-
-      }
-
-      rand_next(afl);
-
-    }
-
-  afl->rand_seed[0] = s0;
-  afl->rand_seed[1] = s1;
-  afl->rand_seed[2] = s2;
-  afl->rand_seed[3] = s3;
-
-}
-
 /* we switch from afl's murmur implementation to xxh3 as it is 30% faster -
    and get 64 bit hashes instead of just 32 bit. Less collisions! :-) */
 
diff --git a/src/afl-showmap.c b/src/afl-showmap.c
index 34a4f30d..b891632a 100644
--- a/src/afl-showmap.c
+++ b/src/afl-showmap.c
@@ -98,11 +98,18 @@ static sharedmem_t *     shm_fuzz;
 /* Classify tuple counts. Instead of mapping to individual bits, as in
    afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
 
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
+#define TIMES96(x) TIMES64(x),TIMES32(x)
+#define TIMES128(x) TIMES64(x),TIMES64(x)
 static const u8 count_class_human[256] = {
 
     [0] = 0,          [1] = 1,        [2] = 2,         [3] = 3,
-    [4 ... 7] = 4,    [8 ... 15] = 5, [16 ... 31] = 6, [32 ... 127] = 7,
-    [128 ... 255] = 8
+    [4] = TIMES4(4),  [8] = TIMES8(5),[16] = TIMES16(6),[32] = TIMES96(7),
+    [128] = TIMES128(8)
 
 };
 
@@ -112,13 +119,20 @@ static const u8 count_class_binary[256] = {
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES128
+#undef TIMES96
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
 static void classify_counts(afl_forkserver_t *fsrv) {
 
diff --git a/src/afl-tmin.c b/src/afl-tmin.c
index b9045551..6cb0d458 100644
--- a/src/afl-tmin.c
+++ b/src/afl-tmin.c
@@ -98,19 +98,29 @@ static sharedmem_t *     shm_fuzz;
 /* Classify tuple counts. This is a slow & naive version, but good enough here.
  */
 
+#define TIMES4(x) x,x,x,x
+#define TIMES8(x) TIMES4(x),TIMES4(x)
+#define TIMES16(x) TIMES8(x),TIMES8(x)
+#define TIMES32(x) TIMES16(x),TIMES16(x)
+#define TIMES64(x) TIMES32(x),TIMES32(x)
 static const u8 count_class_lookup[256] = {
 
     [0] = 0,
     [1] = 1,
     [2] = 2,
     [3] = 4,
-    [4 ... 7] = 8,
-    [8 ... 15] = 16,
-    [16 ... 31] = 32,
-    [32 ... 127] = 64,
-    [128 ... 255] = 128
+    [4] = TIMES4(8),
+    [8] = TIMES8(16),
+    [16] = TIMES16(32),
+    [32] = TIMES32(64),
+    [128] = TIMES64(128)
 
 };
+#undef TIMES64
+#undef TIMES32
+#undef TIMES16
+#undef TIMES8
+#undef TIMES4
 
 static sharedmem_t *deinit_shmem(afl_forkserver_t *fsrv,
                                  sharedmem_t *     shm_fuzz) {
diff --git a/test/test-basic.sh b/test/test-basic.sh
index 24aa30a4..2ddf14af 100755
--- a/test/test-basic.sh
+++ b/test/test-basic.sh
@@ -25,13 +25,16 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
       CODE=1
     }
     rm -f test-instr.plain.0 test-instr.plain.1
+    SKIP=
     TUPLES=`echo 1|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-    test "$TUPLES" -gt 4 -a "$TUPLES" -lt 11 && {
+    test "$TUPLES" -gt 2 -a "$TUPLES" -lt 12 && {
       $ECHO "$GREEN[+] ${AFL_GCC} run reported $TUPLES instrumented locations which is fine"
     } || {
       $ECHO "$RED[!] ${AFL_GCC} instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
+    test "$TUPLES" -lt 4 && SKIP=1
+    true  # this is needed because of the test above
   } || {
     $ECHO "$RED[!] ${AFL_GCC} failed"
     echo CUT------------------------------------------------------------------CUT
@@ -65,18 +68,20 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
   }) || {
     mkdir -p in
     echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
-      CODE=1
+    test -z "$SKIP" && {
+      $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
+        CODE=1
+      }
     }
     echo 000000000000000000000000 > in/in2
     echo 111 > in/in3
@@ -121,6 +126,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
  }
  if [ ${AFL_GCC} = "afl-gcc" ] ; then AFL_GCC=afl-clang ; else AFL_GCC=afl-gcc ; fi
  $ECHO "$BLUE[*] Testing: ${AFL_GCC}, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
+ SKIP=
  test -e ../${AFL_GCC} -a -e ../afl-showmap -a -e ../afl-fuzz && {
   ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../${AFL_GCC} -o test-compcov.harden test-compcov.c > /dev/null 2>&1
@@ -141,12 +147,14 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     }
     rm -f test-instr.plain.0 test-instr.plain.1
     TUPLES=`echo 1|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain 2>&1 | grep Captur | awk '{print$3}'`
-    test "$TUPLES" -gt 4 -a "$TUPLES" -lt 11 && {
+    test "$TUPLES" -gt 2 -a "$TUPLES" -lt 12 && {
       $ECHO "$GREEN[+] ${AFL_GCC} run reported $TUPLES instrumented locations which is fine"
     } || {
       $ECHO "$RED[!] ${AFL_GCC} instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
+    test "$TUPLES" -lt 4 && SKIP=1
+    true  # this is needed because of the test above
   } || {
     $ECHO "$RED[!] ${AFL_GCC} failed"
     echo CUT------------------------------------------------------------------CUT
@@ -180,18 +188,20 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
   }) || {
     mkdir -p in
     echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
-      CODE=1
+    test -z "$SKIP" && {
+      $ECHO "$GREY[*] running afl-fuzz for ${AFL_GCC}, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with ${AFL_GCC}"
+        CODE=1
+      }
     }
     echo 000000000000000000000000 > in/in2
     echo AAA > in/in3
@@ -220,9 +230,9 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
       case "$CNT" in
         *2) $ECHO "$GREEN[+] afl-cmin.bash correctly minimized the number of testcases" ;;
         1)  {
-            test -s in2/* && $ECHO "$YELLOW[?] afl-cmin did minimize to one testcase. This can be a bug or due compiler optimization."
+            test -s in2/* && $ECHO "$YELLOW[?] afl-cmin.bash did minimize to one testcase. This can be a bug or due compiler optimization."
               test -s in2/* || {
-  		$ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases ($CNT)"
+  		$ECHO "$RED[!] afl-cmin.bash did not correctly minimize the number of testcases ($CNT)"
           	CODE=1
               }
             }
diff --git a/test/test-gcc-plugin.sh b/test/test-gcc-plugin.sh
index 71d86364..2b09e753 100755
--- a/test/test-gcc-plugin.sh
+++ b/test/test-gcc-plugin.sh
@@ -19,13 +19,15 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
       } || {
         $ECHO "$GREEN[+] gcc_plugin instrumentation present and working correctly"
         TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain.gccpi 2>&1 | grep Captur | awk '{print$3}'`
-        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 7 && {
+        test "$TUPLES" -gt 3 -a "$TUPLES" -lt 9 && {
           $ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine"
         } || {
           $ECHO "$RED[!] gcc_plugin instrumentation produces a weird numbers: $TUPLES"
           $ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-("
           #CODE=1
         }
+        test "$TUPLES" -lt 4 && SKIP=1
+        true
       }
     } || {
       $ECHO "$RED[!] gcc_plugin instrumentation failed"
@@ -60,22 +62,24 @@ test -e ../afl-gcc-fast -a -e ../afl-compiler-rt.o && {
     CODE=1
     true
   }) || {
-    mkdir -p in
-    echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for gcc_plugin, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain.gccpi >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with gcc_plugin"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with gcc_plugin"
-      CODE=1
+    test -z "$SKIP" && {
+      mkdir -p in
+      echo 0 > in/in
+      $ECHO "$GREY[*] running afl-fuzz for gcc_plugin, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain.gccpi >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with gcc_plugin"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with gcc_plugin"
+        CODE=1
+      }
+      rm -rf in out errors
     }
-    rm -rf in out errors
   }
   rm -f test-instr.plain.gccpi
 
diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index 4fcaf367..09ade0c3 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -31,6 +31,8 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
           $ECHO "$RED[!] llvm_mode instrumentation produces weird numbers: $TUPLES"
           CODE=1
         }
+        test "$TUPLES" -lt 4 && SKIP=1
+        true
       }
     } || {
       $ECHO "$RED[!] llvm_mode instrumentation failed"
@@ -66,18 +68,20 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   }) || {
     mkdir -p in
     echo 0 > in/in
-    $ECHO "$GREY[*] running afl-fuzz for llvm_mode, this will take approx 10 seconds"
-    {
-      ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
-    } >>errors 2>&1
-    test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
-      $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode"
-    } || {
-      echo CUT------------------------------------------------------------------CUT
-      cat errors
-      echo CUT------------------------------------------------------------------CUT
-      $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode"
-      CODE=1
+    test -z "$SKIP" && {
+      $ECHO "$GREY[*] running afl-fuzz for llvm_mode, this will take approx 10 seconds"
+      {
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
+      } >>errors 2>&1
+      test -n "$( ls out/default/queue/id:000002* 2>/dev/null )" && {
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode"
+        CODE=1
+      }
     }
     test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc" || {
       echo 000000000000000000000000 > in/in2
@@ -133,6 +137,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
       }
       rm -f test-instr.instrim test.out
     } || {
+      cat test.out
       $ECHO "$RED[!] llvm_mode InsTrim compilation failed"
       CODE=1
     }
diff --git a/test/test-pre.sh b/test/test-pre.sh
index 4c708a68..85ac320b 100755
--- a/test/test-pre.sh
+++ b/test/test-pre.sh
@@ -90,6 +90,7 @@ unset AFL_CUSTOM_MUTATOR_LIBRARY
 unset AFL_PYTHON_MODULE
 unset AFL_PRELOAD
 unset LD_PRELOAD
+unset SKIP
 
 rm -rf in in2 out
 
diff --git a/utils/afl_proxy/afl-proxy.c b/utils/afl_proxy/afl-proxy.c
index f2dfeac1..aa7a361a 100644
--- a/utils/afl_proxy/afl-proxy.c
+++ b/utils/afl_proxy/afl-proxy.c
@@ -213,7 +213,7 @@ int main(int argc, char *argv[]) {
   u32 len;
 
   /* here you specify the map size you need that you are reporting to
-     afl-fuzz. */
+     afl-fuzz.  Any value is fine as long as it can be divided by 32. */
   __afl_map_size = MAP_SIZE;  // default is 65536
 
   /* then we initialize the shared memory map and start the forkserver */
diff --git a/utils/afl_untracer/afl-untracer.c b/utils/afl_untracer/afl-untracer.c
index cb6f948c..695f8dd1 100644
--- a/utils/afl_untracer/afl-untracer.c
+++ b/utils/afl_untracer/afl-untracer.c
@@ -568,7 +568,7 @@ void setup_trap_instrumentation(void) {
     lib_addr[offset] = 0xcc;  // replace instruction with debug trap
     if (debug)
       fprintf(stderr,
-              "Patch entry: %p[%x] = %p = %02x -> SHADOW(%p) #%d -> %08x\n",
+              "Patch entry: %p[%lx] = %p = %02x -> SHADOW(%p) #%d -> %08x\n",
               lib_addr, offset, lib_addr + offset, orig_byte, shadow,
               bitmap_index, *shadow);
 
@@ -582,7 +582,7 @@ void setup_trap_instrumentation(void) {
     *patch_bytes = 0xd4200000;  // replace instruction with debug trap
     if (debug)
       fprintf(stderr,
-              "Patch entry: %p[%x] = %p = %02x -> SHADOW(%p) #%d -> %016x\n",
+              "Patch entry: %p[%lx] = %p = %02x -> SHADOW(%p) #%d -> %016x\n",
               lib_addr, offset, lib_addr + offset, orig_bytes, shadow,
               bitmap_index, *shadow);