14 files changed, 127 insertions, 45 deletions
diff --git a/Dockerfile b/Dockerfile
index dec952af..8779fee5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,6 +14,7 @@ ARG DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get -y install --no-install-suggests --no-install-recommends \
     automake \
+    ninja-build \
     bison flex \
     build-essential \
     git \
@@ -26,7 +27,7 @@ RUN apt-get update && \
     gnuplot-nox \
     && rm -rf /var/lib/apt/lists/*
 
-RUN echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-11 main" >> /etc/apt/sources.list && \
+RUN echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-12 main" >> /etc/apt/sources.list && \
     wget -qO - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
 
 RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main" >> /etc/apt/sources.list && \
@@ -35,17 +36,17 @@ RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main
 RUN apt-get update && apt-get full-upgrade -y && \
     apt-get -y install --no-install-suggests --no-install-recommends \
     gcc-10 g++-10 gcc-10-plugin-dev gcc-10-multilib gdb lcov \
-    clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
-    libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \
-    libclang-common-11-dev libclang-cpp11 libclang-cpp11-dev liblld-11 \
-    liblld-11-dev liblldb-11 liblldb-11-dev libllvm11 libomp-11-dev \
-    libomp5-11 lld-11 lldb-11 llvm-11 llvm-11-dev llvm-11-runtime llvm-11-tools \
+    clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
+    libc++abi1-12 libc++abi-12-dev libclang1-12 libclang-12-dev \
+    libclang-common-12-dev libclang-cpp12 libclang-cpp12-dev liblld-12 \
+    liblld-12-dev liblldb-12 liblldb-12-dev libllvm12 libomp-12-dev \
+    libomp5-12 lld-12 lldb-12 llvm-12 llvm-12-dev llvm-12-runtime llvm-12-tools \
     && rm -rf /var/lib/apt/lists/*
 
 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 0
 RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 0
 
-ENV LLVM_CONFIG=llvm-config-11
+ENV LLVM_CONFIG=llvm-config-12
 ENV AFL_SKIP_CPUFREQ=1
 
 RUN git clone https://github.com/vanhauser-thc/afl-cov /afl-cov
diff --git a/README.md b/README.md
index 8c4aab93..118a619d 100644
--- a/README.md
+++ b/README.md
@@ -68,9 +68,10 @@ behaviours and defaults:
   3. [How to fuzz a target](#how-to-fuzz-with-afl)
   4. [Fuzzing binary-only targets](#fuzzing-binary-only-targets)
   5. [Good examples and writeups of afl++ usages](#good-examples-and-writeups)
-  6. [Branches](#branches)
-  7. [Want to help?](#help-wanted)
-  8. [Detailed help and description of afl++](#challenges-of-guided-fuzzing)
+  6. [CI Fuzzing](#ci-fuzzing)
+  7. [Branches](#branches)
+  8. [Want to help?](#help-wanted)
+  9. [Detailed help and description of afl++](#challenges-of-guided-fuzzing)
 
 ## Important features of afl++
 
@@ -689,6 +690,9 @@ If you want to know more, the rest of this README and the tons of texts in
 Note that there are also a lot of tools out there that help fuzzing with afl++
 (some might be deprecated or unsupported):
 
+Speeding up fuzzing:
+ * [libfiowrapper](https://github.com/marekzmyslowski/libfiowrapper) - if the function you want to fuzz requires loading a file, this allows using the shared memory testcase feature :-) - recommended.
+
 Minimization of test cases:
  * [afl-pytmin](https://github.com/ilsani/afl-pytmin) - a wrapper for afl-tmin that tries to speed up the process of minimization of a single test case by using many CPU cores.
  * [afl-ddmin-mod](https://github.com/MarkusTeufelberger/afl-ddmin-mod) - a variation of afl-tmin based on the ddmin algorithm. 
@@ -718,11 +722,50 @@ Crash processing
  * [AFLize](https://github.com/d33tah/aflize) - a tool that automatically generates builds of debian packages suitable for AFL.
  * [afl-fid](https://github.com/FoRTE-Research/afl-fid) - a set of tools for working with input data.
 
+## CI Fuzzing
+
+Some notes on CI Fuzzing - this fuzzing is different to normal fuzzing
+campaigns as these are much shorter runnings.
+
+1. Always:
+  * LTO has a much longer compile time which is diametrical to short fuzzing - 
+    hence use afl-clang-fast instead.
+  * `AFL_FAST_CAL` - Enable fast calibration, this halfs the time the saturated
+     corpus needs to be loaded.
+  * `AFL_CMPLOG_ONLY_NEW` - only perform cmplog on new found paths, not the
+    initial corpus as this very likely has been done for them already.
+  * Keep the generated corpus, use afl-cmin and reuse it everytime!
+
+2. Additionally randomize the afl++ compilation options, e.g.
+  * 40% for `AFL_LLVM_CMPLOG`
+  * 10% for `AFL_LLVM_LAF_ALL`
+
+3. Also randomize the afl-fuzz runtime options, e.g.
+  * 60% for `AFL_DISABLE_TRIM`
+  * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
+  * 50% use MOpt (`-L 0`)
+  * 40% for `AFL_EXPAND_HAVOC_NOW`
+  * 30% for old queue processing (`-Z`)
+  * for CMPLOG targets, 60% for `-l 2`, 40% for `-l 3`
+
+4. Do *not* run any `-M` modes, just running `-S` modes is better for CI fuzzing.
+
 ## Fuzzing binary-only targets
 
 When source code is *NOT* available, afl++ offers various support for fast,
 on-the-fly instrumentation of black-box binaries. 
 
+If you do not have to use Unicorn the following setup is recommended:
+  * run 1 afl-fuzz -Q instance with CMPLOG (`-c 0` + `AFL_COMPCOV_LEVEL=2`)
+  * run 1 afl-fuzz -Q instance with QASAN  (`AFL_USE_QASAN=1`)
+  * run 1 afl-fuzz -Q instance with LAF (``AFL_PRELOAD=libcmpcov.so` + `AFL_COMPCOV_LEVEL=2`)
+
+Then run as many instances as you have cores left with either -Q mode or - better -
+use a binary rewriter like afl-dyninst, retrowrite, zipr, fibre, etc.
+
+For Qemu mode, check out the persistent mode and snapshot features, they give
+a huge speed improvement!  
+
 ### QEMU
 
 For linux programs and its libraries this is accomplished with a version of
@@ -733,7 +776,8 @@ feature by doing:
 cd qemu_mode
 ./build_qemu_support.sh
 ```
-For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md).
+For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md) -
+check out the snapshot feature! :-)
 If possible you should use the persistent mode, see [qemu_mode/README.persistent.md](qemu_mode/README.persistent.md).
 The mode is approximately 2-5x slower than compile-time instrumentation, and is
 less conducive to parallelization.
@@ -741,6 +785,9 @@ less conducive to parallelization.
 If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for
 your binary, then you can use afl-fuzz normally and it will have twice
 the speed compared to qemu_mode (but slower than persistent mode).
+Note that several other binary rewriters exist, all with their advantages and
+caveats. As rewriting a binary is much faster than Qemu this is a highly
+recommended approach!
 
 ### Unicorn
 
diff --git a/include/coverage-32.h b/include/coverage-32.h
index a5cc498c..ca36c29f 100644
--- a/include/coverage-32.h
+++ b/include/coverage-32.h
@@ -97,7 +97,7 @@ inline void discover_word(u8 *ret, u32 *current, u32 *virgin) {
 #define PACK_SIZE 16
 inline u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end) {
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
     if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
diff --git a/include/coverage-64.h b/include/coverage-64.h
index 0ede5fa5..54fe9d33 100644
--- a/include/coverage-64.h
+++ b/include/coverage-64.h
@@ -145,7 +145,7 @@ inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
 
   __m256i zeroes = _mm256_setzero_si256();
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     __m256i value = *(__m256i *)current;
     __m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
@@ -172,7 +172,7 @@ inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
   #define PACK_SIZE 32
 inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
     if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index c24173af..65a5d3d2 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -70,7 +70,7 @@
    run. It will end up as .comm, so it shouldn't be too wasteful. */
 
 #if MAP_SIZE <= 65536
-  #define MAP_INITIAL_SIZE 256000
+  #define MAP_INITIAL_SIZE 1048576
 #else
   #define MAP_INITIAL_SIZE MAP_SIZE
 #endif
@@ -368,8 +368,8 @@ static void __afl_map_shm(void) {
 
     if (__afl_map_size && __afl_map_size > MAP_SIZE) {
 
-      u8 *map_env = getenv("AFL_MAP_SIZE");
-      if (!map_env || atoi(map_env) < MAP_SIZE) {
+      u8 *map_env = (u8 *)getenv("AFL_MAP_SIZE");
+      if (!map_env || atoi((char *)map_env) < MAP_SIZE) {
 
         send_forkserver_error(FS_ERROR_MAP_SIZE);
         _exit(1);
@@ -378,7 +378,7 @@ static void __afl_map_shm(void) {
 
     }
 
-    __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0);
+    __afl_area_ptr = (u8 *)shmat(shm_id, (void *)__afl_map_addr, 0);
 
     /* Whooooops. */
 
@@ -405,9 +405,9 @@ static void __afl_map_shm(void) {
 
              __afl_map_addr) {
 
-    __afl_area_ptr =
-        mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
-             MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+    __afl_area_ptr = (u8 *)mmap(
+        (void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
+        MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 
     if (__afl_area_ptr == MAP_FAILED) {
 
@@ -425,7 +425,7 @@ static void __afl_map_shm(void) {
 
     if (__afl_final_loc > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr = malloc(__afl_final_loc);
+      __afl_area_ptr = (u8 *)malloc(__afl_final_loc);
 
     }
 
@@ -439,7 +439,7 @@ static void __afl_map_shm(void) {
 
     if (__afl_map_size > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr_dummy = malloc(__afl_map_size);
+      __afl_area_ptr_dummy = (u8 *)malloc(__afl_map_size);
 
       if (__afl_area_ptr_dummy) {
 
@@ -505,7 +505,7 @@ static void __afl_map_shm(void) {
 #else
     u32 shm_id = atoi(id_str);
 
-    __afl_cmp_map = shmat(shm_id, NULL, 0);
+    __afl_cmp_map = (struct cmp_map *)shmat(shm_id, NULL, 0);
 #endif
 
     __afl_cmp_map_backup = __afl_cmp_map;
@@ -1090,7 +1090,7 @@ __attribute__((constructor(0))) void __afl_auto_first(void) {
   if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
   u8 *ptr;
 
-  ptr = (u8 *)malloc(1024000);
+  ptr = (u8 *)malloc(2097152);
 
   if (ptr && (ssize_t)ptr != -1) {
 
@@ -1320,7 +1320,7 @@ void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2, uint8_t attr) {
 
 void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2, uint8_t attr) {
 
-  // fprintf(stderr, "hook8 arg0=%lx arg1=%lx attr=%u\n", arg1, arg2, attr);
+  fprintf(stderr, "hook8 arg0=%lx arg1=%lx attr=%u\n", arg1, arg2, attr);
 
   if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
 
@@ -1455,24 +1455,48 @@ void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) {
 
 }
 
+void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) {
+
+  __cmplog_ins_hook1(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) {
 
   __cmplog_ins_hook2(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) {
+
+  __cmplog_ins_hook2(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) {
 
   __cmplog_ins_hook4(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) {
+
+  __cmplog_ins_hook4(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) {
 
   __cmplog_ins_hook8(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) {
+
+  __cmplog_ins_hook8(arg1, arg2, 0);
+
+}
+
 #ifdef WORD_SIZE_64
 void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) {
 
@@ -1528,7 +1552,7 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
 // to avoid to call it on .text addresses
 static int area_is_mapped(void *ptr, size_t len) {
 
-  char *p = ptr;
+  char *p = (char *)ptr;
   char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
 
   int r = msync(page, (p - page) + len, MS_ASYNC);
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
index 13dca8c4..fa494f44 100644
--- a/instrumentation/afl-llvm-lto-instrumentation.so.cc
+++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc
@@ -69,7 +69,7 @@ class AFLLTOPass : public ModulePass {
 
     if (getenv("AFL_DEBUG")) debug = 1;
     if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
-      if ((afl_global_id = atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
+      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
         FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n",
               ptr, MAP_SIZE - 1);
 
@@ -88,7 +88,7 @@ class AFLLTOPass : public ModulePass {
   bool runOnModule(Module &M) override;
 
  protected:
-  int      afl_global_id = 1, autodictionary = 1;
+  uint32_t afl_global_id = 1, autodictionary = 1;
   uint32_t function_minimum_size = 1;
   uint32_t inst_blocks = 0, inst_funcs = 0, total_instr = 0;
   uint64_t map_addr = 0x10000;
@@ -800,7 +800,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
           if (documentFile) {
 
-            fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%d\n",
+            fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n",
                     moduleID, F.getName().str().c_str(), afl_global_id);
 
           }
@@ -872,10 +872,10 @@ bool AFLLTOPass::runOnModule(Module &M) {
     while ((map = map >> 1))
       pow2map++;
     WARNF(
-        "We have %d blocks to instrument but the map size is only %u. Either "
+        "We have %u blocks to instrument but the map size is only %u. Either "
         "edit config.h and set MAP_SIZE_POW2 from %d to %u, then recompile "
         "afl-fuzz and llvm_mode and then make this target - or set "
-        "AFL_MAP_SIZE with at least size %d when running afl-fuzz with this "
+        "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this "
         "target.",
         afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id);
 
@@ -925,7 +925,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
       uint32_t write_loc = afl_global_id;
 
-      if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+      if (afl_global_id % 32) write_loc = (((afl_global_id + 32) >> 4) << 4);
 
       GlobalVariable *AFLFinalLoc = new GlobalVariable(
           M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh
index a435f6f6..a161cc43 100755
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@@ -364,6 +364,10 @@ ORIG_CROSS="$CROSS"
 
 if [ "$ORIG_CROSS" = "" ]; then
   CROSS=$CPU_TARGET-linux-gnu-gcc
+  if ! command -v "$CROSS" > /dev/null
+  then # works on Arch Linux
+    CROSS=$CPU_TARGET-pc-linux-gnu-gcc
+  fi
 fi
 
 if ! command -v "$CROSS" > /dev/null
diff --git a/src/afl-cc.c b/src/afl-cc.c
index cf10d9a7..76f4a437 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -1840,6 +1840,8 @@ int main(int argc, char **argv, char **envp) {
     for (i = 0; i < argc; i++)
       SAYF(" '%s'", argv[i]);
     SAYF("\n");
+    fflush(stdout);
+    fflush(stderr);
 
   }
 
@@ -1880,6 +1882,8 @@ int main(int argc, char **argv, char **envp) {
     for (i = 0; i < (s32)cc_par_cnt; i++)
       SAYF(" '%s'", cc_params[i]);
     SAYF("\n");
+    fflush(stdout);
+    fflush(stderr);
 
   }
 
diff --git a/test/test-libextensions.sh b/test/test-libextensions.sh
index 905a4cbc..40a898c8 100755
--- a/test/test-libextensions.sh
+++ b/test/test-libextensions.sh
@@ -38,14 +38,4 @@ test -e ../libdislocator.so && {
 }
 rm -f test-compcov
 
-test -z "$AFL_CC" && {
-  if type gcc >/dev/null; then
-    export AFL_CC=gcc
-  else
-    if type clang >/dev/null; then
-      export AFL_CC=clang
-    fi
-  fi
-}
-
 . ./test-post.sh
diff --git a/test/test-qemu-mode.sh b/test/test-qemu-mode.sh
index 73b39a43..0cd6ef40 100755
--- a/test/test-qemu-mode.sh
+++ b/test/test-qemu-mode.sh
@@ -3,6 +3,16 @@
 . ./test-pre.sh
 
 $ECHO "$BLUE[*] Testing: qemu_mode"
+test -z "$AFL_CC" && {
+  if type gcc >/dev/null; then
+    export AFL_CC=gcc
+  else
+    if type clang >/dev/null; then
+      export AFL_CC=clang
+    fi
+  fi
+}
+
 test -e ../afl-qemu-trace && {
   cc -pie -fPIE -o test-instr ../test-instr.c
   cc -o test-compcov test-compcov.c
diff --git a/unicorn_mode/UNICORNAFL_VERSION b/unicorn_mode/UNICORNAFL_VERSION
index 4d8a03b2..d9ae5590 100644
--- a/unicorn_mode/UNICORNAFL_VERSION
+++ b/unicorn_mode/UNICORNAFL_VERSION
@@ -1 +1 @@
-80d31ef3
+fb2fc9f2
diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh
index c32eb3e1..f1d028f8 100755
--- a/unicorn_mode/build_unicorn_support.sh
+++ b/unicorn_mode/build_unicorn_support.sh
@@ -147,6 +147,8 @@ if [ "$PREREQ_NOTFOUND" = "1" ]; then
   exit 1
 fi
 
+unset CFLAGS
+
 echo "[+] All checks passed!"
 
 echo "[*] Making sure unicornafl is checked out"
diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl
-Subproject 80d31ef367f7a1a75fc48e08e129d10f2ffa049
+Subproject fb2fc9f25df32f17f6b6b859e4dbd70f9a857e0
diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c
index 7bb929b2..6af79e14 100644
--- a/utils/aflpp_driver/aflpp_driver.c
+++ b/utils/aflpp_driver/aflpp_driver.c
@@ -173,7 +173,7 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
 // Execute any files provided as parameters.
 static int ExecuteFilesOnyByOne(int argc, char **argv) {
 
-  unsigned char *buf = malloc(MAX_FILE);
+  unsigned char *buf = (unsigned char *)malloc(MAX_FILE);
   for (int i = 1; i < argc; i++) {
 
     int fd = open(argv[i], O_RDONLY);