2 files changed, 397 insertions, 0 deletions
diff --git a/unicorn_mode/patches/afl-unicorn-cpu-inl.h b/unicorn_mode/patches/afl-unicorn-cpu-inl.h
new file mode 100644
index 00000000..892c3f72
--- /dev/null
+++ b/unicorn_mode/patches/afl-unicorn-cpu-inl.h
@@ -0,0 +1,290 @@
+/*
+   american fuzzy lop - high-performance binary-only instrumentation
+   -----------------------------------------------------------------
+
+   Written by Andrew Griffiths <agriffiths@google.com> and
+              Michal Zalewski <lcamtuf@google.com>
+
+   TCG instrumentation and block chaining support by Andrea Biondo
+                                      <andrea.biondo965@gmail.com>
+   Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co>
+
+   Idea & design very much by Andrew Griffiths.
+
+   Copyright 2015, 2016 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This code is a shim patched into the separately-distributed source
+   code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality
+   to implement AFL-style instrumentation and to take care of the remaining
+   parts of the AFL fork server logic.
+
+   The resulting QEMU binary is essentially a standalone instrumentation
+   tool; for an example of how to leverage it for other purposes, you can
+   have a look at afl-showmap.c.
+
+ */
+
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "../../config.h"
+
+/***************************
+ * VARIOUS AUXILIARY STUFF *
+ ***************************/
+
+/* A snippet patched into tb_find_slow to inform the parent process that
+   we have hit a new block that hasn't been translated yet, and to tell
+   it to translate within its own context, too (this avoids translation
+   overhead in the next forked-off copy). */
+
+#define AFL_UNICORN_CPU_SNIPPET1 do { \
+    afl_request_tsl(pc, cs_base, flags); \
+  } while (0)
+
+/* This snippet kicks in when the instruction pointer is positioned at
+   _start and does the usual forkserver stuff, not very different from
+   regular instrumentation injected via afl-as.h. */
+
+#define AFL_UNICORN_CPU_SNIPPET2 do { \
+    if(unlikely(afl_first_instr == 0)) { \
+      afl_setup(); \
+      afl_forkserver(env); \
+      afl_first_instr = 1; \
+    } \
+    afl_maybe_log(tb->pc); \
+  } while (0)
+
+/* We use one additional file descriptor to relay "needs translation"
+   messages between the child and the fork server. */
+
+#define TSL_FD (FORKSRV_FD - 1)
+
+/* This is equivalent to afl-as.h: */
+
+static unsigned char *afl_area_ptr;
+
+/* Set in the child process in forkserver mode: */
+
+static unsigned char afl_fork_child;
+static unsigned int afl_forksrv_pid;
+
+/* Instrumentation ratio: */
+
+static unsigned int afl_inst_rms = MAP_SIZE;
+
+/* Function declarations. */
+
+static void afl_setup(void);
+static void afl_forkserver(CPUArchState*);
+static inline void afl_maybe_log(unsigned long);
+
+static void afl_wait_tsl(CPUArchState*, int);
+static void afl_request_tsl(target_ulong, target_ulong, uint64_t);
+
+static TranslationBlock *tb_find_slow(CPUArchState*, target_ulong,
+                                      target_ulong, uint64_t);
+
+/* Data structure passed around by the translate handlers: */
+
+struct afl_tsl {
+  target_ulong pc;
+  target_ulong cs_base;
+  uint64_t flags;
+};
+
+/*************************
+ * ACTUAL IMPLEMENTATION *
+ *************************/
+
+/* Set up SHM region and initialize other stuff. */
+
+static void afl_setup(void) {
+
+  char *id_str = getenv(SHM_ENV_VAR),
+       *inst_r = getenv("AFL_INST_RATIO");
+
+  int shm_id;
+
+  if (inst_r) {
+
+    unsigned int r;
+
+    r = atoi(inst_r);
+
+    if (r > 100) r = 100;
+    if (!r) r = 1;
+
+    afl_inst_rms = MAP_SIZE * r / 100;
+
+  }
+
+  if (id_str) {
+
+    shm_id = atoi(id_str);
+    afl_area_ptr = shmat(shm_id, NULL, 0);
+
+    if (afl_area_ptr == (void*)-1) exit(1);
+
+    /* With AFL_INST_RATIO set to a low value, we want to touch the bitmap
+       so that the parent doesn't give up on us. */
+
+    if (inst_r) afl_area_ptr[0] = 1;
+  }
+}
+
+/* Fork server logic, invoked once we hit first emulated instruction. */
+
+static void afl_forkserver(CPUArchState *env) {
+
+  static unsigned char tmp[4];
+
+  if (!afl_area_ptr) return;
+
+  /* Tell the parent that we're alive. If the parent doesn't want
+     to talk, assume that we're not running in forkserver mode. */
+
+  if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
+
+  afl_forksrv_pid = getpid();
+
+  /* All right, let's await orders... */
+
+  while (1) {
+
+    pid_t child_pid;
+    int status, t_fd[2];
+
+    /* Whoops, parent dead? */
+
+    if (read(FORKSRV_FD, tmp, 4) != 4) exit(2);
+
+    /* Establish a channel with child to grab translation commands. We'll 
+       read from t_fd[0], child will write to TSL_FD. */
+
+    if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
+    close(t_fd[1]);
+
+    child_pid = fork();
+    if (child_pid < 0) exit(4);
+
+    if (!child_pid) {
+
+      /* Child process. Close descriptors and run free. */
+
+      afl_fork_child = 1;
+      close(FORKSRV_FD);
+      close(FORKSRV_FD + 1);
+      close(t_fd[0]);
+      return;
+
+    }
+
+    /* Parent. */
+
+    close(TSL_FD);
+
+    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);
+
+    /* Collect translation requests until child dies and closes the pipe. */
+
+    afl_wait_tsl(env, t_fd[0]);
+
+    /* Get and relay exit status to parent. */
+
+    if (waitpid(child_pid, &status, 0) < 0) exit(6);
+    if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);
+
+  }
+
+}
+
+
+/* The equivalent of the tuple logging routine from afl-as.h. */
+
+static inline void afl_maybe_log(unsigned long cur_loc) {
+
+  static __thread unsigned long prev_loc;
+
+  // DEBUG
+  //printf("IN AFL_MAYBE_LOG 0x%lx\n", cur_loc);
+
+  // MODIFIED FOR UNICORN MODE -> We want to log all addresses,
+  // so the checks for 'start < addr < end' are removed
+  if(!afl_area_ptr)
+    return;
+
+  // DEBUG
+  //printf("afl_area_ptr = %p\n", afl_area_ptr);
+
+  /* Looks like QEMU always maps to fixed locations, so ASAN is not a
+     concern. Phew. But instruction addresses may be aligned. Let's mangle
+     the value to get something quasi-uniform. */
+
+  cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+  cur_loc &= MAP_SIZE - 1;
+
+  /* Implement probabilistic instrumentation by looking at scrambled block
+     address. This keeps the instrumented locations stable across runs. */
+
+  // DEBUG
+  //printf("afl_inst_rms = 0x%lx\n", afl_inst_rms);
+
+  if (cur_loc >= afl_inst_rms) return;
+
+  // DEBUG
+  //printf("cur_loc = 0x%lx\n", cur_loc);  
+
+  afl_area_ptr[cur_loc ^ prev_loc]++;
+  prev_loc = cur_loc >> 1;
+
+}
+
+
+/* This code is invoked whenever QEMU decides that it doesn't have a
+   translation of a particular block and needs to compute it. When this happens,
+   we tell the parent to mirror the operation, so that the next fork() has a
+   cached copy. */
+
+static void afl_request_tsl(target_ulong pc, target_ulong cb, uint64_t flags) {
+
+  struct afl_tsl t;
+
+  if (!afl_fork_child) return;
+
+  t.pc      = pc;
+  t.cs_base = cb;
+  t.flags   = flags;
+
+  if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
+    return;
+
+}
+
+
+/* This is the other side of the same channel. Since timeouts are handled by
+   afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
+
+static void afl_wait_tsl(CPUArchState *env, int fd) {
+
+  struct afl_tsl t;
+
+  while (1) {
+
+    /* Broken pipe means it's time to return to the fork server routine. */
+
+    if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
+      break;
+
+    tb_find_slow(env, t.pc, t.cs_base, t.flags);
+  }
+
+  close(fd);
+}
+
diff --git a/unicorn_mode/patches/patches.diff b/unicorn_mode/patches/patches.diff
new file mode 100644
index 00000000..71a1e2f3
--- /dev/null
+++ b/unicorn_mode/patches/patches.diff
@@ -0,0 +1,107 @@
+diff --git a/Makefile b/Makefile
+index 7d73782..fb3ccfd 100644
+--- a/Makefile
++++ b/Makefile
+@@ -88,6 +88,10 @@ AR = llvm-ar
+ LDFLAGS := -fsanitize=address ${LDFLAGS}
+ endif
+ 
++ifeq ($(UNICORN_AFL),yes)
++UNICORN_CFLAGS += -DUNICORN_AFL
++endif
++
+ ifeq ($(CROSS),)
+ CC ?= cc
+ AR ?= ar
+diff --git a/config.mk b/config.mk
+index c3621fb..c7b4f7e 100644
+--- a/config.mk
++++ b/config.mk
+@@ -8,7 +8,7 @@
+ # Compile with debug info when you want to debug code.
+ # Change this to 'no' for release edition.
+ 
+-UNICORN_DEBUG ?= yes
++UNICORN_DEBUG ?= no
+ 
+ ################################################################################
+ # Specify which archs you want to compile in. By default, we build all archs.
+@@ -28,3 +28,9 @@ UNICORN_STATIC ?= yes
+ # a shared library.
+ 
+ UNICORN_SHARED ?= yes
++
++
++################################################################################
++# Changing 'UNICORN_AFLL = yes' to 'UNICORN_AFL = no' disables AFL instrumentation
++
++UNICORN_AFL ?= yes
+diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
+index 7755adf..8114b70 100644
+--- a/qemu/cpu-exec.c
++++ b/qemu/cpu-exec.c
+@@ -24,6 +24,11 @@
+ 
+ #include "uc_priv.h"
+ 
++#if defined(UNICORN_AFL)
++#include "../afl-unicorn-cpu-inl.h"
++static int afl_first_instr = 0;
++#endif 
++
+ static tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr);
+ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
+         target_ulong cs_base, uint64_t flags);
+@@ -231,6 +236,10 @@ int cpu_exec(struct uc_struct *uc, CPUArchState *env)   // qq
+                             next_tb & TB_EXIT_MASK, tb);
+                 }
+ 
++#if defined(UNICORN_AFL)
++                AFL_UNICORN_CPU_SNIPPET2;
++#endif
++
+                 /* cpu_interrupt might be called while translating the
+                    TB, but before it is linked into a potentially
+                    infinite loop and becomes env->current_tb. Avoid
+@@ -369,6 +378,11 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
+ not_found:
+     /* if no translated code available, then translate it now */
+     tb = tb_gen_code(cpu, pc, cs_base, (int)flags, 0);   // qq
++    
++#if defined(UNICORN_AFL)
++    /* There seems to be no chaining in unicorn ever? :( */
++    AFL_UNICORN_CPU_SNIPPET1;
++#endif
+ 
+ found:
+     /* Move the last found TB to the head of the list */
+diff --git a/qemu/translate-all.c b/qemu/translate-all.c
+index 1a96c34..7ef4878 100644
+--- a/qemu/translate-all.c
++++ b/qemu/translate-all.c
+@@ -403,11 +403,25 @@ static PageDesc *page_find_alloc(struct uc_struct *uc, tb_page_addr_t index, int
+ 
+ #if defined(CONFIG_USER_ONLY)
+     /* We can't use g_malloc because it may recurse into a locked mutex. */
++#if defined(UNICORN_AFL)
++    /* This was added by unicorn-afl to bail out semi-gracefully if out of memory. */
++# define ALLOC(P, SIZE)                                 \
++    do {                                                \
++        void* _tmp = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
++                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
++        if (_tmp == (void*)-1) { \
++            qemu_log(">>> Out of memory for stack, bailing out. <<<\n"); \
++            exit(1); \
++        } \
++        (P) = _tmp; \
++    } while (0)
++#else /* !UNICORN_AFL */
+ # define ALLOC(P, SIZE)                                 \
+     do {                                                \
+         P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
+     } while (0)
++#endif /* UNICORN_AFL */
+ #else
+ # define ALLOC(P, SIZE) \
+     do { P = g_malloc0(SIZE); } while (0)