about summary refs log tree commit diff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/afl-as.h767
-rw-r--r--include/alloc-inl.h586
-rw-r--r--include/common.h6
-rw-r--r--include/config.h359
-rw-r--r--include/debug.h251
-rw-r--r--include/forkserver.h25
-rw-r--r--include/hash.h104
-rw-r--r--include/sharedmem.h7
-rw-r--r--include/types.h91
9 files changed, 2196 insertions, 0 deletions
diff --git a/include/afl-as.h b/include/afl-as.h
new file mode 100644
index 00000000..4748eda7
--- /dev/null
+++ b/include/afl-as.h
@@ -0,0 +1,767 @@
+/*
+   american fuzzy lop - injectable parts
+   -------------------------------------
+
+   Written and maintained by Michal Zalewski <lcamtuf@google.com>
+
+   Forkserver design by Jann Horn <jannhorn@googlemail.com>
+
+   Copyright 2013, 2014, 2015 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This file houses the assembly-level instrumentation injected into fuzzed
+   programs. The instrumentation stores XORed pairs of data: identifiers of the
+   currently executing branch and the one that executed immediately before.
+
+   TL;DR: the instrumentation does shm_trace_map[cur_loc ^ prev_loc]++
+
+   The code is designed for 32-bit and 64-bit x86 systems. Both modes should
+   work everywhere except for Apple systems. Apple does relocations differently
+   from everybody else, so since their OSes have been 64-bit for a longer while,
+   I didn't go through the mental effort of porting the 32-bit code.
+
+   In principle, similar code should be easy to inject into any well-behaved
+   binary-only code (e.g., using DynamoRIO). Conditional jumps offer natural
+   targets for instrumentation, and should offer comparable probe density.
+
+ */
+
+#ifndef _HAVE_AFL_AS_H
+#define _HAVE_AFL_AS_H
+
+#include "config.h"
+#include "types.h"
+
+/* 
+   ------------------
+   Performances notes
+   ------------------
+
+   Contributions to make this code faster are appreciated! Here are some
+   rough notes that may help with the task:
+
+   - Only the trampoline_fmt and the non-setup __afl_maybe_log code paths are
+     really worth optimizing; the setup / fork server stuff matters a lot less
+     and should be mostly just kept readable.
+
+   - We're aiming for modern CPUs with out-of-order execution and large
+     pipelines; the code is mostly follows intuitive, human-readable
+     instruction ordering, because "textbook" manual reorderings make no
+     substantial difference.
+
+   - Interestingly, instrumented execution isn't a lot faster if we store a
+     variable pointer to the setup, log, or return routine and then do a reg
+     call from within trampoline_fmt. It does speed up non-instrumented
+     execution quite a bit, though, since that path just becomes
+     push-call-ret-pop.
+
+   - There is also not a whole lot to be gained by doing SHM attach at a
+     fixed address instead of retrieving __afl_area_ptr. Although it allows us
+     to have a shorter log routine inserted for conditional jumps and jump
+     labels (for a ~10% perf gain), there is a risk of bumping into other
+     allocations created by the program or by tools such as ASAN.
+
+   - popf is *awfully* slow, which is why we're doing the lahf / sahf +
+     overflow test trick. Unfortunately, this forces us to taint eax / rax, but
+     this dependency on a commonly-used register still beats the alternative of
+     using pushf / popf.
+
+     One possible optimization is to avoid touching flags by using a circular
+     buffer that stores just a sequence of current locations, with the XOR stuff
+     happening offline. Alas, this doesn't seem to have a huge impact:
+
+     https://groups.google.com/d/msg/afl-users/MsajVf4fRLo/2u6t88ntUBIJ
+
+   - Preforking one child a bit sooner, and then waiting for the "go" command
+     from within the child, doesn't offer major performance gains; fork() seems
+     to be relatively inexpensive these days. Preforking multiple children does
+     help, but badly breaks the "~1 core per fuzzer" design, making it harder to
+     scale up. Maybe there is some middle ground.
+
+   Perhaps of note: in the 64-bit version for all platforms except for Apple,
+   the instrumentation is done slightly differently than on 32-bit, with
+   __afl_prev_loc and __afl_area_ptr being local to the object file (.lcomm),
+   rather than global (.comm). This is to avoid GOTRELPC lookups in the critical
+   code path, which AFAICT, are otherwise unavoidable if we want gcc -shared to
+   work; simple relocations between .bss and .text won't work on most 64-bit
+   platforms in such a case.
+
+   (Fun fact: on Apple systems, .lcomm can segfault the linker.)
+
+   The side effect is that state transitions are measured in a somewhat
+   different way, with previous tuple being recorded separately within the scope
+   of every .c file. This should have no impact in any practical sense.
+
+   Another side effect of this design is that getenv() will be called once per
+   every .o file when running in non-instrumented mode; and since getenv() tends
+   to be optimized in funny ways, we need to be very careful to save every
+   oddball register it may touch.
+
+ */
+
+static const u8* trampoline_fmt_32 =
+
+  "\n"
+  "/* --- AFL TRAMPOLINE (32-BIT) --- */\n"
+  "\n"
+  ".align 4\n"
+  "\n"
+  "leal -16(%%esp), %%esp\n"
+  "movl %%edi,  0(%%esp)\n"
+  "movl %%edx,  4(%%esp)\n"
+  "movl %%ecx,  8(%%esp)\n"
+  "movl %%eax, 12(%%esp)\n"
+  "movl $0x%08x, %%ecx\n"
+  "call __afl_maybe_log\n"
+  "movl 12(%%esp), %%eax\n"
+  "movl  8(%%esp), %%ecx\n"
+  "movl  4(%%esp), %%edx\n"
+  "movl  0(%%esp), %%edi\n"
+  "leal 16(%%esp), %%esp\n"
+  "\n"
+  "/* --- END --- */\n"
+  "\n";
+
+static const u8* trampoline_fmt_64 =
+
+  "\n"
+  "/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
+  "\n"
+  ".align 4\n"
+  "\n"
+  "leaq -(128+24)(%%rsp), %%rsp\n"
+  "movq %%rdx,  0(%%rsp)\n"
+  "movq %%rcx,  8(%%rsp)\n"
+  "movq %%rax, 16(%%rsp)\n"
+  "movq $0x%08x, %%rcx\n"
+  "call __afl_maybe_log\n"
+  "movq 16(%%rsp), %%rax\n"
+  "movq  8(%%rsp), %%rcx\n"
+  "movq  0(%%rsp), %%rdx\n"
+  "leaq (128+24)(%%rsp), %%rsp\n"
+  "\n"
+  "/* --- END --- */\n"
+  "\n";
+
+static const u8* main_payload_32 = 
+
+  "\n"
+  "/* --- AFL MAIN PAYLOAD (32-BIT) --- */\n"
+  "\n"
+  ".text\n"
+  ".att_syntax\n"
+  ".code32\n"
+  ".align 8\n"
+  "\n"
+
+  "__afl_maybe_log:\n"
+  "\n"
+  "  lahf\n"
+  "  seto %al\n"
+  "\n"
+  "  /* Check if SHM region is already mapped. */\n"
+  "\n"
+  "  movl  __afl_area_ptr, %edx\n"
+  "  testl %edx, %edx\n"
+  "  je    __afl_setup\n"
+  "\n"
+  "__afl_store:\n"
+  "\n"
+  "  /* Calculate and store hit for the code location specified in ecx. There\n"
+  "     is a double-XOR way of doing this without tainting another register,\n"
+  "     and we use it on 64-bit systems; but it's slower for 32-bit ones. */\n"
+  "\n"
+#ifndef COVERAGE_ONLY
+  "  movl __afl_prev_loc, %edi\n"
+  "  xorl %ecx, %edi\n"
+  "  shrl $1, %ecx\n"
+  "  movl %ecx, __afl_prev_loc\n"
+#else
+  "  movl %ecx, %edi\n"
+#endif /* ^!COVERAGE_ONLY */
+  "\n"
+#ifdef SKIP_COUNTS
+  "  orb  $1, (%edx, %edi, 1)\n"
+#else
+  "  incb (%edx, %edi, 1)\n"
+  "  adcb $0, (%edx, %edi, 1)\n" // never zero counter implementation. slightly better path discovery and little performance impact
+#endif /* ^SKIP_COUNTS */
+  "\n"
+  "__afl_return:\n"
+  "\n"
+  "  addb $127, %al\n"
+  "  sahf\n"
+  "  ret\n"
+  "\n"
+  ".align 8\n"
+  "\n"
+  "__afl_setup:\n"
+  "\n"
+  "  /* Do not retry setup if we had previous failures. */\n"
+  "\n"
+  "  cmpb $0, __afl_setup_failure\n"
+  "  jne  __afl_return\n"
+  "\n"
+  "  /* Map SHM, jumping to __afl_setup_abort if something goes wrong.\n"
+  "     We do not save FPU/MMX/SSE registers here, but hopefully, nobody\n"
+  "     will notice this early in the game. */\n"
+  "\n"
+  "  pushl %eax\n"
+  "  pushl %ecx\n"
+  "\n"
+  "  pushl $.AFL_SHM_ENV\n"
+  "  call  getenv\n"
+  "  addl  $4, %esp\n"
+  "\n"
+  "  testl %eax, %eax\n"
+  "  je    __afl_setup_abort\n"
+  "\n"
+#ifdef USEMMAP
+  "  pushl $384        /* shm_open mode 0600 */\n"
+  "  pushl $2          /* flags O_RDWR   */\n"
+  "  pushl %eax        /* SHM file path  */\n"
+  "  call  shm_open\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl $-1, %eax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+  "  pushl $0          /* mmap off       */\n"
+  "  pushl %eax        /* shm fd         */\n"
+  "  pushl $1          /* mmap flags     */\n"
+  "  pushl $3          /* mmap prot      */\n"
+  "  pushl $"STRINGIFY(MAP_SIZE)"          /* mmap len       */\n"
+  "  pushl $0          /* mmap addr      */\n"
+  "  call  mmap\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl $-1, %eax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+#else
+  "  pushl %eax\n"
+  "  call  atoi\n"
+  "  addl  $4, %esp\n"
+  "\n"
+  "  pushl $0          /* shmat flags    */\n"
+  "  pushl $0          /* requested addr */\n"
+  "  pushl %eax        /* SHM ID         */\n"
+  "  call  shmat\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl $-1, %eax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+#endif
+  "  /* Store the address of the SHM region. */\n"
+  "\n"
+  "  movl %eax, __afl_area_ptr\n"
+  "  movl %eax, %edx\n"
+  "\n"
+  "  popl %ecx\n"
+  "  popl %eax\n"
+  "\n"
+  "__afl_forkserver:\n"
+  "\n"
+  "  /* Enter the fork server mode to avoid the overhead of execve() calls. */\n"
+  "\n"
+  "  pushl %eax\n"
+  "  pushl %ecx\n"
+  "  pushl %edx\n"
+  "\n"
+  "  /* Phone home and tell the parent that we're OK. (Note that signals with\n"
+  "     no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
+  "     closed because we were execve()d from an instrumented binary, or because\n" 
+  "     the parent doesn't want to use the fork server. */\n"
+  "\n"
+  "  pushl $4          /* length    */\n"
+  "  pushl $__afl_temp /* data      */\n"
+  "  pushl $" STRINGIFY((FORKSRV_FD + 1)) "  /* file desc */\n"
+  "  call  write\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl  $4, %eax\n"
+  "  jne   __afl_fork_resume\n"
+  "\n"
+  "__afl_fork_wait_loop:\n"
+  "\n"
+  "  /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
+  "\n"
+  "  pushl $4          /* length    */\n"
+  "  pushl $__afl_temp /* data      */\n"
+  "  pushl $" STRINGIFY(FORKSRV_FD) "        /* file desc */\n"
+  "  call  read\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl  $4, %eax\n"
+  "  jne   __afl_die\n"
+  "\n"
+  "  /* Once woken up, create a clone of our process. This is an excellent use\n"
+  "     case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
+  "     caches getpid() results and offers no way to update the value, breaking\n"
+  "     abort(), raise(), and a bunch of other things :-( */\n"
+  "\n"
+  "  call fork\n"
+  "\n"
+  "  cmpl $0, %eax\n"
+  "  jl   __afl_die\n"
+  "  je   __afl_fork_resume\n"
+  "\n"
+  "  /* In parent process: write PID to pipe, then wait for child. */\n"
+  "\n"
+  "  movl  %eax, __afl_fork_pid\n"
+  "\n"
+  "  pushl $4              /* length    */\n"
+  "  pushl $__afl_fork_pid /* data      */\n"
+  "  pushl $" STRINGIFY((FORKSRV_FD + 1)) "      /* file desc */\n"
+  "  call  write\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  pushl $0             /* no flags  */\n"
+  "  pushl $__afl_temp    /* status    */\n"
+  "  pushl __afl_fork_pid /* PID       */\n"
+  "  call  waitpid\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl  $0, %eax\n"
+  "  jle   __afl_die\n"
+  "\n"
+  "  /* Relay wait status to pipe, then loop back. */\n"
+  "\n"
+  "  pushl $4          /* length    */\n"
+  "  pushl $__afl_temp /* data      */\n"
+  "  pushl $" STRINGIFY((FORKSRV_FD + 1)) "  /* file desc */\n"
+  "  call  write\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  jmp __afl_fork_wait_loop\n"
+  "\n"
+  "__afl_fork_resume:\n"
+  "\n"
+  "  /* In child process: close fds, resume execution. */\n"
+  "\n"
+  "  pushl $" STRINGIFY(FORKSRV_FD) "\n"
+  "  call  close\n"
+  "\n"
+  "  pushl $" STRINGIFY((FORKSRV_FD + 1)) "\n"
+  "  call  close\n"
+  "\n"
+  "  addl  $8, %esp\n"
+  "\n"
+  "  popl %edx\n"
+  "  popl %ecx\n"
+  "  popl %eax\n"
+  "  jmp  __afl_store\n"
+  "\n"
+  "__afl_die:\n"
+  "\n"
+  "  xorl %eax, %eax\n"
+  "  call _exit\n"
+  "\n"
+  "__afl_setup_abort:\n"
+  "\n"
+  "  /* Record setup failure so that we don't keep calling\n"
+  "     shmget() / shmat() over and over again. */\n"
+  "\n"
+  "  incb __afl_setup_failure\n"
+  "  popl %ecx\n"
+  "  popl %eax\n"
+  "  jmp __afl_return\n"
+  "\n"
+  ".AFL_VARS:\n"
+  "\n"
+  "  .comm   __afl_area_ptr, 4, 32\n"
+  "  .comm   __afl_setup_failure, 1, 32\n"
+#ifndef COVERAGE_ONLY
+  "  .comm   __afl_prev_loc, 4, 32\n"
+#endif /* !COVERAGE_ONLY */
+  "  .comm   __afl_fork_pid, 4, 32\n"
+  "  .comm   __afl_temp, 4, 32\n"
+  "\n"
+  ".AFL_SHM_ENV:\n"
+  "  .asciz \"" SHM_ENV_VAR "\"\n"
+  "\n"
+  "/* --- END --- */\n"
+  "\n";
+
+/* The OpenBSD hack is due to lahf and sahf not being recognized by some
+   versions of binutils: http://marc.info/?l=openbsd-cvs&m=141636589924400
+
+   The Apple code is a bit different when calling libc functions because
+   they are doing relocations differently from everybody else. We also need
+   to work around the crash issue with .lcomm and the fact that they don't
+   recognize .string. */
+
+#ifdef __APPLE__
+#  define CALL_L64(str)		"call _" str "\n"
+#else
+#  define CALL_L64(str)		"call " str "@PLT\n"
+#endif /* ^__APPLE__ */
+
+static const u8* main_payload_64 = 
+
+  "\n"
+  "/* --- AFL MAIN PAYLOAD (64-BIT) --- */\n"
+  "\n"
+  ".text\n"
+  ".att_syntax\n"
+  ".code64\n"
+  ".align 8\n"
+  "\n"
+  "__afl_maybe_log:\n"
+  "\n"
+#if defined(__OpenBSD__)  || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
+  "  .byte 0x9f /* lahf */\n"
+#else
+  "  lahf\n"
+#endif /* ^__OpenBSD__, etc */
+  "  seto  %al\n"
+  "\n"
+  "  /* Check if SHM region is already mapped. */\n"
+  "\n"
+  "  movq  __afl_area_ptr(%rip), %rdx\n"
+  "  testq %rdx, %rdx\n"
+  "  je    __afl_setup\n"
+  "\n"
+  "__afl_store:\n"
+  "\n"
+  "  /* Calculate and store hit for the code location specified in rcx. */\n"
+  "\n"
+#ifndef COVERAGE_ONLY
+  "  xorq __afl_prev_loc(%rip), %rcx\n"
+  "  xorq %rcx, __afl_prev_loc(%rip)\n"
+  "  shrq $1, __afl_prev_loc(%rip)\n"
+#endif /* ^!COVERAGE_ONLY */
+  "\n"
+#ifdef SKIP_COUNTS
+  "  orb  $1, (%rdx, %rcx, 1)\n"
+#else
+  "  incb (%rdx, %rcx, 1)\n"
+  "  adcb $0, (%rdx, %rcx, 1)\n" // never zero counter implementation. slightly better path discovery and little performance impact
+#endif /* ^SKIP_COUNTS */
+  "\n"
+  "__afl_return:\n"
+  "\n"
+  "  addb $127, %al\n"
+#if defined(__OpenBSD__)  || (defined(__FreeBSD__) && (__FreeBSD__ < 9))
+  "  .byte 0x9e /* sahf */\n"
+#else
+  "  sahf\n"
+#endif /* ^__OpenBSD__, etc */
+  "  ret\n"
+  "\n"
+  ".align 8\n"
+  "\n"
+  "__afl_setup:\n"
+  "\n"
+  "  /* Do not retry setup if we had previous failures. */\n"
+  "\n"
+  "  cmpb $0, __afl_setup_failure(%rip)\n"
+  "  jne __afl_return\n"
+  "\n"
+  "  /* Check out if we have a global pointer on file. */\n"
+  "\n"
+#ifndef __APPLE__
+  "  movq  __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
+  "  movq  (%rdx), %rdx\n"
+#else
+  "  movq  __afl_global_area_ptr(%rip), %rdx\n"
+#endif /* !^__APPLE__ */
+  "  testq %rdx, %rdx\n"
+  "  je    __afl_setup_first\n"
+  "\n"
+  "  movq %rdx, __afl_area_ptr(%rip)\n"
+  "  jmp  __afl_store\n" 
+  "\n"
+  "__afl_setup_first:\n"
+  "\n"
+  "  /* Save everything that is not yet saved and that may be touched by\n"
+  "     getenv() and several other libcalls we'll be relying on. */\n"
+  "\n"
+  "  leaq -352(%rsp), %rsp\n"
+  "\n"
+  "  movq %rax,   0(%rsp)\n"
+  "  movq %rcx,   8(%rsp)\n"
+  "  movq %rdi,  16(%rsp)\n"
+  "  movq %rsi,  32(%rsp)\n"
+  "  movq %r8,   40(%rsp)\n"
+  "  movq %r9,   48(%rsp)\n"
+  "  movq %r10,  56(%rsp)\n"
+  "  movq %r11,  64(%rsp)\n"
+  "\n"
+  "  movq %xmm0,  96(%rsp)\n"
+  "  movq %xmm1,  112(%rsp)\n"
+  "  movq %xmm2,  128(%rsp)\n"
+  "  movq %xmm3,  144(%rsp)\n"
+  "  movq %xmm4,  160(%rsp)\n"
+  "  movq %xmm5,  176(%rsp)\n"
+  "  movq %xmm6,  192(%rsp)\n"
+  "  movq %xmm7,  208(%rsp)\n"
+  "  movq %xmm8,  224(%rsp)\n"
+  "  movq %xmm9,  240(%rsp)\n"
+  "  movq %xmm10, 256(%rsp)\n"
+  "  movq %xmm11, 272(%rsp)\n"
+  "  movq %xmm12, 288(%rsp)\n"
+  "  movq %xmm13, 304(%rsp)\n"
+  "  movq %xmm14, 320(%rsp)\n"
+  "  movq %xmm15, 336(%rsp)\n"
+  "\n"
+  "  /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n"
+  "\n"
+  "  /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n"
+  "     original stack ptr in the callee-saved r12. */\n"
+  "\n"
+  "  pushq %r12\n"
+  "  movq  %rsp, %r12\n"
+  "  subq  $16, %rsp\n"
+  "  andq  $0xfffffffffffffff0, %rsp\n"
+  "\n"
+  "  leaq .AFL_SHM_ENV(%rip), %rdi\n"
+  CALL_L64("getenv")
+  "\n"
+  "  testq %rax, %rax\n"
+  "  je    __afl_setup_abort\n"
+  "\n"
+#ifdef USEMMAP
+  "  movl $384, %edx   /* shm_open mode 0600 */\n"
+  "  movl $2,   %esi   /* flags O_RDWR   */\n"
+  "  movq %rax, %rdi   /* SHM file path  */\n"
+  CALL_L64("shm_open")
+  "\n"
+  "  cmpq $-1, %rax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+  "  movl    $0, %r9d\n"
+  "  movl    %eax, %r8d\n"
+  "  movl    $1, %ecx\n"
+  "  movl    $3, %edx\n"
+  "  movl    $"STRINGIFY(MAP_SIZE)", %esi\n"
+  "  movl    $0, %edi\n"
+  CALL_L64("mmap")
+  "\n"
+  "  cmpq $-1, %rax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+#else
+  "  movq  %rax, %rdi\n"
+  CALL_L64("atoi")
+  "\n"
+  "  xorq %rdx, %rdx   /* shmat flags    */\n"
+  "  xorq %rsi, %rsi   /* requested addr */\n"
+  "  movq %rax, %rdi   /* SHM ID         */\n"
+  CALL_L64("shmat")
+  "\n"
+  "  cmpq $-1, %rax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+#endif
+  "  /* Store the address of the SHM region. */\n"
+  "\n"
+  "  movq %rax, %rdx\n"
+  "  movq %rax, __afl_area_ptr(%rip)\n"
+  "\n"
+#ifdef __APPLE__
+  "  movq %rax, __afl_global_area_ptr(%rip)\n"
+#else
+  "  movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
+  "  movq %rax, (%rdx)\n"
+#endif /* ^__APPLE__ */
+  "  movq %rax, %rdx\n"
+  "\n"
+  "__afl_forkserver:\n"
+  "\n"
+  "  /* Enter the fork server mode to avoid the overhead of execve() calls. We\n"
+  "     push rdx (area ptr) twice to keep stack alignment neat. */\n"
+  "\n"
+  "  pushq %rdx\n"
+  "  pushq %rdx\n"
+  "\n"
+  "  /* Phone home and tell the parent that we're OK. (Note that signals with\n"
+  "     no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
+  "     closed because we were execve()d from an instrumented binary, or because\n"
+  "     the parent doesn't want to use the fork server. */\n"
+  "\n"
+  "  movq $4, %rdx               /* length    */\n"
+  "  leaq __afl_temp(%rip), %rsi /* data      */\n"
+  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi       /* file desc */\n"
+  CALL_L64("write")
+  "\n"
+  "  cmpq $4, %rax\n"
+  "  jne  __afl_fork_resume\n"
+  "\n"
+  "__afl_fork_wait_loop:\n"
+  "\n"
+  "  /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
+  "\n"
+  "  movq $4, %rdx               /* length    */\n"
+  "  leaq __afl_temp(%rip), %rsi /* data      */\n"
+  "  movq $" STRINGIFY(FORKSRV_FD) ", %rdi             /* file desc */\n"
+  CALL_L64("read")
+  "  cmpq $4, %rax\n"
+  "  jne  __afl_die\n"
+  "\n"
+  "  /* Once woken up, create a clone of our process. This is an excellent use\n"
+  "     case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
+  "     caches getpid() results and offers no way to update the value, breaking\n"
+  "     abort(), raise(), and a bunch of other things :-( */\n"
+  "\n"
+  CALL_L64("fork")
+  "  cmpq $0, %rax\n"
+  "  jl   __afl_die\n"
+  "  je   __afl_fork_resume\n"
+  "\n"
+  "  /* In parent process: write PID to pipe, then wait for child. */\n"
+  "\n"
+  "  movl %eax, __afl_fork_pid(%rip)\n"
+  "\n"
+  "  movq $4, %rdx                   /* length    */\n"
+  "  leaq __afl_fork_pid(%rip), %rsi /* data      */\n"
+  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi             /* file desc */\n"
+  CALL_L64("write")
+  "\n"
+  "  movq $0, %rdx                   /* no flags  */\n"
+  "  leaq __afl_temp(%rip), %rsi     /* status    */\n"
+  "  movq __afl_fork_pid(%rip), %rdi /* PID       */\n"
+  CALL_L64("waitpid")
+  "  cmpq $0, %rax\n"
+  "  jle  __afl_die\n"
+  "\n"
+  "  /* Relay wait status to pipe, then loop back. */\n"
+  "\n"
+  "  movq $4, %rdx               /* length    */\n"
+  "  leaq __afl_temp(%rip), %rsi /* data      */\n"
+  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi         /* file desc */\n"
+  CALL_L64("write")
+  "\n"
+  "  jmp  __afl_fork_wait_loop\n"
+  "\n"
+  "__afl_fork_resume:\n"
+  "\n"
+  "  /* In child process: close fds, resume execution. */\n"
+  "\n"
+  "  movq $" STRINGIFY(FORKSRV_FD) ", %rdi\n"
+  CALL_L64("close")
+  "\n"
+  "  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi\n"
+  CALL_L64("close")
+  "\n"
+  "  popq %rdx\n"
+  "  popq %rdx\n"
+  "\n"
+  "  movq %r12, %rsp\n"
+  "  popq %r12\n"
+  "\n"
+  "  movq  0(%rsp), %rax\n"
+  "  movq  8(%rsp), %rcx\n"
+  "  movq 16(%rsp), %rdi\n"
+  "  movq 32(%rsp), %rsi\n"
+  "  movq 40(%rsp), %r8\n"
+  "  movq 48(%rsp), %r9\n"
+  "  movq 56(%rsp), %r10\n"
+  "  movq 64(%rsp), %r11\n"
+  "\n"
+  "  movq  96(%rsp), %xmm0\n"
+  "  movq 112(%rsp), %xmm1\n"
+  "  movq 128(%rsp), %xmm2\n"
+  "  movq 144(%rsp), %xmm3\n"
+  "  movq 160(%rsp), %xmm4\n"
+  "  movq 176(%rsp), %xmm5\n"
+  "  movq 192(%rsp), %xmm6\n"
+  "  movq 208(%rsp), %xmm7\n"
+  "  movq 224(%rsp), %xmm8\n"
+  "  movq 240(%rsp), %xmm9\n"
+  "  movq 256(%rsp), %xmm10\n"
+  "  movq 272(%rsp), %xmm11\n"
+  "  movq 288(%rsp), %xmm12\n"
+  "  movq 304(%rsp), %xmm13\n"
+  "  movq 320(%rsp), %xmm14\n"
+  "  movq 336(%rsp), %xmm15\n"
+  "\n"
+  "  leaq 352(%rsp), %rsp\n"
+  "\n"
+  "  jmp  __afl_store\n"
+  "\n"
+  "__afl_die:\n"
+  "\n"
+  "  xorq %rax, %rax\n"
+  CALL_L64("_exit")
+  "\n"
+  "__afl_setup_abort:\n"
+  "\n"
+  "  /* Record setup failure so that we don't keep calling\n"
+  "     shmget() / shmat() over and over again. */\n"
+  "\n"
+  "  incb __afl_setup_failure(%rip)\n"
+  "\n"
+  "  movq %r12, %rsp\n"
+  "  popq %r12\n"
+  "\n"
+  "  movq  0(%rsp), %rax\n"
+  "  movq  8(%rsp), %rcx\n"
+  "  movq 16(%rsp), %rdi\n"
+  "  movq 32(%rsp), %rsi\n"
+  "  movq 40(%rsp), %r8\n"
+  "  movq 48(%rsp), %r9\n"
+  "  movq 56(%rsp), %r10\n"
+  "  movq 64(%rsp), %r11\n"
+  "\n"
+  "  movq  96(%rsp), %xmm0\n"
+  "  movq 112(%rsp), %xmm1\n"
+  "  movq 128(%rsp), %xmm2\n"
+  "  movq 144(%rsp), %xmm3\n"
+  "  movq 160(%rsp), %xmm4\n"
+  "  movq 176(%rsp), %xmm5\n"
+  "  movq 192(%rsp), %xmm6\n"
+  "  movq 208(%rsp), %xmm7\n"
+  "  movq 224(%rsp), %xmm8\n"
+  "  movq 240(%rsp), %xmm9\n"
+  "  movq 256(%rsp), %xmm10\n"
+  "  movq 272(%rsp), %xmm11\n"
+  "  movq 288(%rsp), %xmm12\n"
+  "  movq 304(%rsp), %xmm13\n"
+  "  movq 320(%rsp), %xmm14\n"
+  "  movq 336(%rsp), %xmm15\n"
+  "\n"
+  "  leaq 352(%rsp), %rsp\n"
+  "\n"
+  "  jmp __afl_return\n"
+  "\n"
+  ".AFL_VARS:\n"
+  "\n"
+
+#ifdef __APPLE__
+
+  "  .comm   __afl_area_ptr, 8\n"
+#ifndef COVERAGE_ONLY
+  "  .comm   __afl_prev_loc, 8\n"
+#endif /* !COVERAGE_ONLY */
+  "  .comm   __afl_fork_pid, 4\n"
+  "  .comm   __afl_temp, 4\n"
+  "  .comm   __afl_setup_failure, 1\n"
+
+#else
+
+  "  .lcomm   __afl_area_ptr, 8\n"
+#ifndef COVERAGE_ONLY
+  "  .lcomm   __afl_prev_loc, 8\n"
+#endif /* !COVERAGE_ONLY */
+  "  .lcomm   __afl_fork_pid, 4\n"
+  "  .lcomm   __afl_temp, 4\n"
+  "  .lcomm   __afl_setup_failure, 1\n"
+
+#endif /* ^__APPLE__ */
+
+  "  .comm    __afl_global_area_ptr, 8, 8\n"
+  "\n"
+  ".AFL_SHM_ENV:\n"
+  "  .asciz \"" SHM_ENV_VAR "\"\n"
+  "\n"
+  "/* --- END --- */\n"
+  "\n";
+
+#endif /* !_HAVE_AFL_AS_H */
diff --git a/include/alloc-inl.h b/include/alloc-inl.h
new file mode 100644
index 00000000..2f98da0e
--- /dev/null
+++ b/include/alloc-inl.h
@@ -0,0 +1,586 @@
+/*
+   american fuzzy lop - error-checking, memory-zeroing alloc routines
+   ------------------------------------------------------------------
+
+   Written and maintained by Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2013, 2014, 2015 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This allocator is not designed to resist malicious attackers (the canaries
+   are small and predictable), but provides a robust and portable way to detect
+   use-after-free, off-by-one writes, stale pointers, and so on.
+
+ */
+
+#ifndef _HAVE_ALLOC_INL_H
+#define _HAVE_ALLOC_INL_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "types.h"
+#include "debug.h"
+
+/* User-facing macro to sprintf() to a dynamically allocated buffer. */
+
+#define alloc_printf(_str...) ({ \
+    u8* _tmp; \
+    s32 _len = snprintf(NULL, 0, _str); \
+    if (_len < 0) FATAL("Whoa, snprintf() fails?!"); \
+    _tmp = ck_alloc(_len + 1); \
+    snprintf((char*)_tmp, _len + 1, _str); \
+    _tmp; \
+  })
+
+/* Macro to enforce allocation limits as a last-resort defense against
+   integer overflows. */
+
+#define ALLOC_CHECK_SIZE(_s) do { \
+    if ((_s) > MAX_ALLOC) \
+      ABORT("Bad alloc request: %u bytes", (_s)); \
+  } while (0)
+
+/* Macro to check malloc() failures and the like. */
+
+#define ALLOC_CHECK_RESULT(_r, _s) do { \
+    if (!(_r)) \
+      ABORT("Out of memory: can't allocate %u bytes", (_s)); \
+  } while (0)
+
+/* Magic tokens used to mark used / freed chunks. */
+
+#define ALLOC_MAGIC_C1  0xFF00FF00 /* Used head (dword)  */
+#define ALLOC_MAGIC_F   0xFE00FE00 /* Freed head (dword) */
+#define ALLOC_MAGIC_C2  0xF0       /* Used tail (byte)   */
+
+/* Positions of guard tokens in relation to the user-visible pointer. */
+
+#define ALLOC_C1(_ptr)  (((u32*)(_ptr))[-2])
+#define ALLOC_S(_ptr)   (((u32*)(_ptr))[-1])
+#define ALLOC_C2(_ptr)  (((u8*)(_ptr))[ALLOC_S(_ptr)])
+
+#define ALLOC_OFF_HEAD  8
+#define ALLOC_OFF_TOTAL (ALLOC_OFF_HEAD + 1)
+
+/* Allocator increments for ck_realloc_block(). */
+
+#define ALLOC_BLK_INC    256
+
+/* Sanity-checking macros for pointers. */
+
+#define CHECK_PTR(_p) do { \
+    if (_p) { \
+      if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\
+        if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \
+          ABORT("Use after free."); \
+        else ABORT("Corrupted head alloc canary."); \
+      } \
+   } \
+  } while (0)
+
+/*
+#define CHECK_PTR(_p) do { \
+    if (_p) { \
+      if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\
+        if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \
+          ABORT("Use after free."); \
+        else ABORT("Corrupted head alloc canary."); \
+      } \
+      if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \
+        ABORT("Corrupted tail alloc canary."); \
+    } \
+  } while (0)
+*/
+
+#define CHECK_PTR_EXPR(_p) ({ \
+    typeof (_p) _tmp = (_p); \
+    CHECK_PTR(_tmp); \
+    _tmp; \
+  })
+
+
+/* Allocate a buffer, explicitly not zeroing it. Returns NULL for zero-sized
+   requests. */
+
+static inline void* DFL_ck_alloc_nozero(u32 size) {
+
+  u8* ret;
+
+  if (!size) return NULL;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size + ALLOC_OFF_TOTAL);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  ret += ALLOC_OFF_HEAD;
+
+  ALLOC_C1(ret) = ALLOC_MAGIC_C1;
+  ALLOC_S(ret)  = size;
+  ALLOC_C2(ret) = ALLOC_MAGIC_C2;
+
+  return (void *)ret;
+
+}
+
+
+/* Allocate a buffer, returning zeroed memory. */
+
+static inline void* DFL_ck_alloc(u32 size) {
+
+  void* mem;
+
+  if (!size) return NULL;
+  mem = DFL_ck_alloc_nozero(size);
+
+  return memset(mem, 0, size);
+
+}
+
+
+/* Free memory, checking for double free and corrupted heap. When DEBUG_BUILD
+   is set, the old memory will be also clobbered with 0xFF. */
+
+static inline void DFL_ck_free(void* mem) {
+
+  if (!mem) return;
+
+  CHECK_PTR(mem);
+
+#ifdef DEBUG_BUILD
+
+  /* Catch pointer issues sooner. */
+  memset(mem, 0xFF, ALLOC_S(mem));
+
+#endif /* DEBUG_BUILD */
+
+  ALLOC_C1(mem) = ALLOC_MAGIC_F;
+
+  u8 *realStart = mem;
+  free(realStart - ALLOC_OFF_HEAD);
+
+}
+
+
+/* Re-allocate a buffer, checking for issues and zeroing any newly-added tail.
+   With DEBUG_BUILD, the buffer is always reallocated to a new addresses and the
+   old memory is clobbered with 0xFF. */
+
+static inline void* DFL_ck_realloc(void* orig, u32 size) {
+
+  u8*   ret;
+  u32   old_size = 0;
+
+  if (!size) {
+
+    DFL_ck_free(orig);
+    return NULL;
+
+  }
+
+  if (orig) {
+
+    CHECK_PTR(orig);
+
+#ifndef DEBUG_BUILD
+    ALLOC_C1(orig) = ALLOC_MAGIC_F;
+#endif /* !DEBUG_BUILD */
+
+    old_size  = ALLOC_S(orig);
+    u8 *origu8 = orig;
+    origu8   -= ALLOC_OFF_HEAD;
+    orig = origu8;
+
+    ALLOC_CHECK_SIZE(old_size);
+
+  }
+
+  ALLOC_CHECK_SIZE(size);
+
+#ifndef DEBUG_BUILD
+
+  ret = realloc(orig, size + ALLOC_OFF_TOTAL);
+  ALLOC_CHECK_RESULT(ret, size);
+
+#else
+
+  /* Catch pointer issues sooner: force relocation and make sure that the
+     original buffer is wiped. */
+
+  ret = malloc(size + ALLOC_OFF_TOTAL);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  if (orig) {
+
+    u8 *origu8 = orig;
+    memcpy(ret + ALLOC_OFF_HEAD, origu8 + ALLOC_OFF_HEAD, MIN(size, old_size));
+    memset(origu8 + ALLOC_OFF_HEAD, 0xFF, old_size);
+
+    ALLOC_C1(origu8 + ALLOC_OFF_HEAD) = ALLOC_MAGIC_F;
+
+    free(orig);
+
+  }
+
+#endif /* ^!DEBUG_BUILD */
+
+  ret += ALLOC_OFF_HEAD;
+
+  ALLOC_C1(ret) = ALLOC_MAGIC_C1;
+  ALLOC_S(ret)  = size;
+  ALLOC_C2(ret) = ALLOC_MAGIC_C2;
+
+  if (size > old_size)
+    memset(ret + old_size, 0, size - old_size);
+
+  return (void *)ret;
+
+}
+
+
+/* Re-allocate a buffer with ALLOC_BLK_INC increments (used to speed up
+   repeated small reallocs without complicating the user code). */
+
+static inline void* DFL_ck_realloc_block(void* orig, u32 size) {
+
+#ifndef DEBUG_BUILD
+
+  if (orig) {
+
+    CHECK_PTR(orig);
+
+    if (ALLOC_S(orig) >= size) return orig;
+
+    size += ALLOC_BLK_INC;
+
+  }
+
+#endif /* !DEBUG_BUILD */
+
+  return DFL_ck_realloc(orig, size);
+
+}
+
+
+/* Create a buffer with a copy of a string. Returns NULL for NULL inputs. */
+
+static inline u8* DFL_ck_strdup(u8* str) {
+
+  u8*   ret;
+  u32   size;
+
+  if (!str) return NULL;
+
+  size = strlen((char*)str) + 1;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size + ALLOC_OFF_TOTAL);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  ret += ALLOC_OFF_HEAD;
+
+  ALLOC_C1(ret) = ALLOC_MAGIC_C1;
+  ALLOC_S(ret)  = size;
+  ALLOC_C2(ret) = ALLOC_MAGIC_C2;
+
+  return memcpy(ret, str, size);
+
+}
+
+
+/* Create a buffer with a copy of a memory block. Returns NULL for zero-sized
+   or NULL inputs. */
+
+static inline void* DFL_ck_memdup(void* mem, u32 size) {
+
+  u8*   ret;
+
+  if (!mem || !size) return NULL;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size + ALLOC_OFF_TOTAL);
+  ALLOC_CHECK_RESULT(ret, size);
+  
+  ret += ALLOC_OFF_HEAD;
+
+  ALLOC_C1(ret) = ALLOC_MAGIC_C1;
+  ALLOC_S(ret)  = size;
+  ALLOC_C2(ret) = ALLOC_MAGIC_C2;
+
+  return memcpy(ret, mem, size);
+
+}
+
+
+/* Create a buffer with a block of text, appending a NUL terminator at the end.
+   Returns NULL for zero-sized or NULL inputs. */
+
+static inline u8* DFL_ck_memdup_str(u8* mem, u32 size) {
+
+  u8* ret;
+
+  if (!mem || !size) return NULL;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size + ALLOC_OFF_TOTAL + 1);
+  ALLOC_CHECK_RESULT(ret, size);
+  
+  ret += ALLOC_OFF_HEAD;
+
+  ALLOC_C1(ret) = ALLOC_MAGIC_C1;
+  ALLOC_S(ret)  = size;
+  ALLOC_C2(ret) = ALLOC_MAGIC_C2;
+
+  memcpy(ret, mem, size);
+  ret[size] = 0;
+
+  return ret;
+
+}
+
+
+#ifndef DEBUG_BUILD
+
+/* In non-debug mode, we just do straightforward aliasing of the above functions
+   to user-visible names such as ck_alloc(). */
+
+#define ck_alloc          DFL_ck_alloc
+#define ck_alloc_nozero   DFL_ck_alloc_nozero
+#define ck_realloc        DFL_ck_realloc
+#define ck_realloc_block  DFL_ck_realloc_block
+#define ck_strdup         DFL_ck_strdup
+#define ck_memdup         DFL_ck_memdup
+#define ck_memdup_str     DFL_ck_memdup_str
+#define ck_free           DFL_ck_free
+
+#define alloc_report()
+
+#else
+
+/* In debugging mode, we also track allocations to detect memory leaks, and the
+   flow goes through one more layer of indirection. */
+
+/* Alloc tracking data structures: */
+
+#define ALLOC_BUCKETS     4096
+
+struct TRK_obj {
+  void *ptr;
+  char *file, *func;
+  u32  line;
+};
+
+#ifdef AFL_MAIN
+
+struct TRK_obj* TRK[ALLOC_BUCKETS];
+u32 TRK_cnt[ALLOC_BUCKETS];
+
+#  define alloc_report() TRK_report()
+
+#else
+
+extern struct TRK_obj* TRK[ALLOC_BUCKETS];
+extern u32 TRK_cnt[ALLOC_BUCKETS];
+
+#  define alloc_report()
+
+#endif /* ^AFL_MAIN */
+
+/* Bucket-assigning function for a given pointer: */
+
+#define TRKH(_ptr) (((((u32)(_ptr)) >> 16) ^ ((u32)(_ptr))) % ALLOC_BUCKETS)
+
+
+/* Add a new entry to the list of allocated objects. */
+
+static inline void TRK_alloc_buf(void* ptr, const char* file, const char* func,
+                                 u32 line) {
+
+  u32 i, bucket;
+
+  if (!ptr) return;
+
+  bucket = TRKH(ptr);
+
+  /* Find a free slot in the list of entries for that bucket. */
+
+  for (i = 0; i < TRK_cnt[bucket]; i++)
+
+    if (!TRK[bucket][i].ptr) {
+
+      TRK[bucket][i].ptr  = ptr;
+      TRK[bucket][i].file = (char*)file;
+      TRK[bucket][i].func = (char*)func;
+      TRK[bucket][i].line = line;
+      return;
+
+    }
+
+  /* No space available - allocate more. */
+
+  TRK[bucket] = DFL_ck_realloc_block(TRK[bucket],
+    (TRK_cnt[bucket] + 1) * sizeof(struct TRK_obj));
+
+  TRK[bucket][i].ptr  = ptr;
+  TRK[bucket][i].file = (char*)file;
+  TRK[bucket][i].func = (char*)func;
+  TRK[bucket][i].line = line;
+
+  TRK_cnt[bucket]++;
+
+}
+
+
+/* Remove entry from the list of allocated objects. */
+
+static inline void TRK_free_buf(void* ptr, const char* file, const char* func,
+                                u32 line) {
+
+  u32 i, bucket;
+
+  if (!ptr) return;
+
+  bucket = TRKH(ptr);
+
+  /* Find the element on the list... */
+
+  for (i = 0; i < TRK_cnt[bucket]; i++)
+
+    if (TRK[bucket][i].ptr == ptr) {
+
+      TRK[bucket][i].ptr = 0;
+      return;
+
+    }
+
+  WARNF("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)",
+        func, file, line);
+
+}
+
+
+/* Do a final report on all non-deallocated objects. */
+
+static inline void TRK_report(void) {
+
+  u32 i, bucket;
+
+  fflush(0);
+
+  for (bucket = 0; bucket < ALLOC_BUCKETS; bucket++)
+    for (i = 0; i < TRK_cnt[bucket]; i++)
+      if (TRK[bucket][i].ptr)
+        WARNF("ALLOC: Memory never freed, created in %s (%s:%u)",
+              TRK[bucket][i].func, TRK[bucket][i].file, TRK[bucket][i].line);
+
+}
+
+
+/* Simple wrappers for non-debugging functions: */
+
+static inline void* TRK_ck_alloc(u32 size, const char* file, const char* func,
+                                 u32 line) {
+
+  void* ret = DFL_ck_alloc(size);
+  TRK_alloc_buf(ret, file, func, line);
+  return ret;
+
+}
+
+
+static inline void* TRK_ck_realloc(void* orig, u32 size, const char* file,
+                                   const char* func, u32 line) {
+
+  void* ret = DFL_ck_realloc(orig, size);
+  TRK_free_buf(orig, file, func, line);
+  TRK_alloc_buf(ret, file, func, line);
+  return ret;
+
+}
+
+
+static inline void* TRK_ck_realloc_block(void* orig, u32 size, const char* file,
+                                         const char* func, u32 line) {
+
+  void* ret = DFL_ck_realloc_block(orig, size);
+  TRK_free_buf(orig, file, func, line);
+  TRK_alloc_buf(ret, file, func, line);
+  return ret;
+
+}
+
+
+static inline void* TRK_ck_strdup(u8* str, const char* file, const char* func,
+                                  u32 line) {
+
+  void* ret = DFL_ck_strdup(str);
+  TRK_alloc_buf(ret, file, func, line);
+  return ret;
+
+}
+
+
+static inline void* TRK_ck_memdup(void* mem, u32 size, const char* file,
+                                  const char* func, u32 line) {
+
+  void* ret = DFL_ck_memdup(mem, size);
+  TRK_alloc_buf(ret, file, func, line);
+  return ret;
+
+}
+
+
+static inline void* TRK_ck_memdup_str(void* mem, u32 size, const char* file,
+                                      const char* func, u32 line) {
+
+  void* ret = DFL_ck_memdup_str(mem, size);
+  TRK_alloc_buf(ret, file, func, line);
+  return ret;
+
+}
+
+
+static inline void TRK_ck_free(void* ptr, const char* file,
+                                const char* func, u32 line) {
+
+  TRK_free_buf(ptr, file, func, line);
+  DFL_ck_free(ptr);
+
+}
+
+/* Aliasing user-facing names to tracking functions: */
+
+#define ck_alloc(_p1) \
+  TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_alloc_nozero(_p1) \
+  TRK_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_realloc(_p1, _p2) \
+  TRK_ck_realloc(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_realloc_block(_p1, _p2) \
+  TRK_ck_realloc_block(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_strdup(_p1) \
+  TRK_ck_strdup(_p1, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_memdup(_p1, _p2) \
+  TRK_ck_memdup(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_memdup_str(_p1, _p2) \
+  TRK_ck_memdup_str(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
+
+#define ck_free(_p1) \
+  TRK_ck_free(_p1, __FILE__, __FUNCTION__, __LINE__)
+
+#endif /* ^!DEBUG_BUILD */
+
+#endif /* ! _HAVE_ALLOC_INL_H */
diff --git a/include/common.h b/include/common.h
new file mode 100644
index 00000000..161caa39
--- /dev/null
+++ b/include/common.h
@@ -0,0 +1,6 @@
+#ifndef __AFLCOMMON_H
+#define __AFLCOMMON_H
+#include "types.h"
+
+void detect_file_args(char **argv, u8 *prog_in);
+#endif
diff --git a/include/config.h b/include/config.h
new file mode 100644
index 00000000..37a2a794
--- /dev/null
+++ b/include/config.h
@@ -0,0 +1,359 @@
+/*
+   american fuzzy lop plus plus - vaguely configurable bits
+   ----------------------------------------------
+
+   Written and maintained by Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ */
+
+#ifndef _HAVE_CONFIG_H
+#define _HAVE_CONFIG_H
+
+#include "types.h"
+
+/* Version string: */
+
+#define VERSION             "++2.53d"  // c = release, d = volatile github dev
+
+/******************************************************
+ *                                                    *
+ *  Settings that may be of interest to power users:  *
+ *                                                    *
+ ******************************************************/
+
+/* Comment out to disable terminal colors (note that this makes afl-analyze
+   a lot less nice): */
+
+#define USE_COLOR
+
+/* Comment out to disable fancy ANSI boxes and use poor man's 7-bit UI: */
+
+#define FANCY_BOXES
+
+/* Default timeout for fuzzed code (milliseconds). This is the upper bound,
+   also used for detecting hangs; the actual value is auto-scaled: */
+
+#define EXEC_TIMEOUT        1000
+
+/* Timeout rounding factor when auto-scaling (milliseconds): */
+
+#define EXEC_TM_ROUND       20
+
+/* Default memory limit for child process (MB): */
+
+#ifndef __x86_64__ 
+#  define MEM_LIMIT         25
+#else
+#  define MEM_LIMIT         50
+#endif /* ^!__x86_64__ */
+
+/* Default memory limit when running in QEMU mode (MB): */
+
+#define MEM_LIMIT_QEMU      200
+
+/* Default memory limit when running in Unicorn mode (MB): */
+
+#define MEM_LIMIT_UNICORN   200
+
+/* Number of calibration cycles per every new test case (and for test
+   cases that show variable behavior): */
+
+#define CAL_CYCLES          8
+#define CAL_CYCLES_LONG     40
+
+/* Number of subsequent timeouts before abandoning an input file: */
+
+#define TMOUT_LIMIT         250
+
+/* Maximum number of unique hangs or crashes to record: */
+
+#define KEEP_UNIQUE_HANG    500
+#define KEEP_UNIQUE_CRASH   5000
+
+/* Baseline number of random tweaks during a single 'havoc' stage: */
+
+#define HAVOC_CYCLES        256
+#define HAVOC_CYCLES_INIT   1024
+
+/* Maximum multiplier for the above (should be a power of two, beware
+   of 32-bit int overflows): */
+
+#define HAVOC_MAX_MULT      16
+#define HAVOC_MAX_MULT_MOPT 32
+
+/* Absolute minimum number of havoc cycles (after all adjustments): */
+
+#define HAVOC_MIN           16
+
+/* Power Schedule Divisor */
+#define POWER_BETA          1
+#define MAX_FACTOR          (POWER_BETA * 32)
+
+/* Maximum stacking for havoc-stage tweaks. The actual value is calculated
+   like this: 
+
+   n = random between 1 and HAVOC_STACK_POW2
+   stacking = 2^n
+
+   In other words, the default (n = 7) produces 2, 4, 8, 16, 32, 64, or
+   128 stacked tweaks: */
+
+#define HAVOC_STACK_POW2    7
+
+/* Caps on block sizes for cloning and deletion operations. Each of these
+   ranges has a 33% probability of getting picked, except for the first
+   two cycles where smaller blocks are favored: */
+
+#define HAVOC_BLK_SMALL     32
+#define HAVOC_BLK_MEDIUM    128
+#define HAVOC_BLK_LARGE     1500
+
+/* Extra-large blocks, selected very rarely (<5% of the time): */
+
+#define HAVOC_BLK_XL        32768
+
+/* Probabilities of skipping non-favored entries in the queue, expressed as
+   percentages: */
+
+#define SKIP_TO_NEW_PROB    99 /* ...when there are new, pending favorites */
+#define SKIP_NFAV_OLD_PROB  95 /* ...no new favs, cur entry already fuzzed */
+#define SKIP_NFAV_NEW_PROB  75 /* ...no new favs, cur entry not fuzzed yet */
+
+/* Splicing cycle count: */
+
+#define SPLICE_CYCLES       15
+
+/* Nominal per-splice havoc cycle length: */
+
+#define SPLICE_HAVOC        32
+
+/* Maximum offset for integer addition / subtraction stages: */
+
+#define ARITH_MAX           35
+
+/* Limits for the test case trimmer. The absolute minimum chunk size; and
+   the starting and ending divisors for chopping up the input file: */
+
+#define TRIM_MIN_BYTES      4
+#define TRIM_START_STEPS    16
+#define TRIM_END_STEPS      1024
+
+/* Maximum size of input file, in bytes (keep under 100MB): */
+
+#define MAX_FILE            (1 * 1024 * 1024)
+
+/* The same, for the test case minimizer: */
+
+#define TMIN_MAX_FILE       (10 * 1024 * 1024)
+
+/* Block normalization steps for afl-tmin: */
+
+#define TMIN_SET_MIN_SIZE   4
+#define TMIN_SET_STEPS      128
+
+/* Maximum dictionary token size (-x), in bytes: */
+
+#define MAX_DICT_FILE       128
+
+/* Length limits for auto-detected dictionary tokens: */
+
+#define MIN_AUTO_EXTRA      3
+#define MAX_AUTO_EXTRA      32
+
+/* Maximum number of user-specified dictionary tokens to use in deterministic
+   steps; past this point, the "extras/user" step will be still carried out,
+   but with proportionally lower odds: */
+
+#define MAX_DET_EXTRAS      200
+
+/* Maximum number of auto-extracted dictionary tokens to actually use in fuzzing
+   (first value), and to keep in memory as candidates. The latter should be much
+   higher than the former. */
+
+#define USE_AUTO_EXTRAS     50
+#define MAX_AUTO_EXTRAS     (USE_AUTO_EXTRAS * 10)
+
+/* Scaling factor for the effector map used to skip some of the more
+   expensive deterministic steps. The actual divisor is set to
+   2^EFF_MAP_SCALE2 bytes: */
+
+#define EFF_MAP_SCALE2      3
+
+/* Minimum input file length at which the effector logic kicks in: */
+
+#define EFF_MIN_LEN         128
+
+/* Maximum effector density past which everything is just fuzzed
+   unconditionally (%): */
+
+#define EFF_MAX_PERC        90
+
+/* UI refresh frequency (Hz): */
+
+#define UI_TARGET_HZ        5
+
+/* Fuzzer stats file and plot update intervals (sec): */
+
+#define STATS_UPDATE_SEC    60
+#define PLOT_UPDATE_SEC     5
+
+/* Smoothing divisor for CPU load and exec speed stats (1 - no smoothing). */
+
+#define AVG_SMOOTHING       16
+
+/* Sync interval (every n havoc cycles): */
+
+#define SYNC_INTERVAL       5
+
+/* Output directory reuse grace period (minutes): */
+
+#define OUTPUT_GRACE        25
+
+/* Uncomment to use simple file names (id_NNNNNN): */
+
+// #define SIMPLE_FILES
+
+/* List of interesting values to use in fuzzing. */
+
+#define INTERESTING_8 \
+  -128,          /* Overflow signed 8-bit when decremented  */ \
+  -1,            /*                                         */ \
+   0,            /*                                         */ \
+   1,            /*                                         */ \
+   16,           /* One-off with common buffer size         */ \
+   32,           /* One-off with common buffer size         */ \
+   64,           /* One-off with common buffer size         */ \
+   100,          /* One-off with common buffer size         */ \
+   127           /* Overflow signed 8-bit when incremented  */
+
+#define INTERESTING_16 \
+  -32768,        /* Overflow signed 16-bit when decremented */ \
+  -129,          /* Overflow signed 8-bit                   */ \
+   128,          /* Overflow signed 8-bit                   */ \
+   255,          /* Overflow unsig 8-bit when incremented   */ \
+   256,          /* Overflow unsig 8-bit                    */ \
+   512,          /* One-off with common buffer size         */ \
+   1000,         /* One-off with common buffer size         */ \
+   1024,         /* One-off with common buffer size         */ \
+   4096,         /* One-off with common buffer size         */ \
+   32767         /* Overflow signed 16-bit when incremented */
+
+#define INTERESTING_32 \
+  -2147483648LL, /* Overflow signed 32-bit when decremented */ \
+  -100663046,    /* Large negative number (endian-agnostic) */ \
+  -32769,        /* Overflow signed 16-bit                  */ \
+   32768,        /* Overflow signed 16-bit                  */ \
+   65535,        /* Overflow unsig 16-bit when incremented  */ \
+   65536,        /* Overflow unsig 16 bit                   */ \
+   100663045,    /* Large positive number (endian-agnostic) */ \
+   2147483647    /* Overflow signed 32-bit when incremented */
+
+/***********************************************************
+ *                                                         *
+ *  Really exotic stuff you probably don't want to touch:  *
+ *                                                         *
+ ***********************************************************/
+
+/* Call count interval between reseeding the libc PRNG from /dev/urandom: */
+
+#define RESEED_RNG          10000
+
+/* Maximum line length passed from GCC to 'as' and used for parsing
+   configuration files: */
+
+#define MAX_LINE            8192
+
+/* Environment variable used to pass SHM ID to the called program. */
+
+#define SHM_ENV_VAR         "__AFL_SHM_ID"
+
+/* Other less interesting, internal-only variables. */
+
+#define CLANG_ENV_VAR       "__AFL_CLANG_MODE"
+#define AS_LOOP_ENV_VAR     "__AFL_AS_LOOPCHECK"
+#define PERSIST_ENV_VAR     "__AFL_PERSISTENT"
+#define DEFER_ENV_VAR       "__AFL_DEFER_FORKSRV"
+
+/* In-code signatures for deferred and persistent mode. */
+
+#define PERSIST_SIG         "##SIG_AFL_PERSISTENT##"
+#define DEFER_SIG           "##SIG_AFL_DEFER_FORKSRV##"
+
+/* Distinctive bitmap signature used to indicate failed execution: */
+
+#define EXEC_FAIL_SIG       0xfee1dead
+
+/* Distinctive exit code used to indicate MSAN trip condition: */
+
+#define MSAN_ERROR          86
+
+/* Designated file descriptors for forkserver commands (the application will
+   use FORKSRV_FD and FORKSRV_FD + 1): */
+
+#define FORKSRV_FD          198
+
+/* Fork server init timeout multiplier: we'll wait the user-selected
+   timeout plus this much for the fork server to spin up. */
+
+#define FORK_WAIT_MULT      10
+
+/* Calibration timeout adjustments, to be a bit more generous when resuming
+   fuzzing sessions or trying to calibrate already-added internal finds.
+   The first value is a percentage, the other is in milliseconds: */
+
+#define CAL_TMOUT_PERC      125
+#define CAL_TMOUT_ADD       50
+
+/* Number of chances to calibrate a case before giving up: */
+
+#define CAL_CHANCES         3
+
+/* Map size for the traced binary (2^MAP_SIZE_POW2). Must be greater than
+   2; you probably want to keep it under 18 or so for performance reasons
+   (adjusting AFL_INST_RATIO when compiling is probably a better way to solve
+   problems with complex programs). You need to recompile the target binary
+   after changing this - otherwise, SEGVs may ensue. */
+
+#define MAP_SIZE_POW2       16
+#define MAP_SIZE            (1 << MAP_SIZE_POW2)
+
+/* Maximum allocator request size (keep well under INT_MAX): */
+
+#define MAX_ALLOC           0x40000000
+
+/* A made-up hashing seed: */
+
+#define HASH_CONST          0xa5b35705
+
+/* Constants for afl-gotcpu to control busy loop timing: */
+
+#define  CTEST_TARGET_MS    5000
+#define  CTEST_CORE_TRG_MS  1000
+#define  CTEST_BUSY_CYCLES  (10 * 1000 * 1000)
+
+/* Uncomment this to use inferior block-coverage-based instrumentation. Note
+   that you need to recompile the target binary for this to have any effect: */
+
+// #define COVERAGE_ONLY
+
+/* Uncomment this to ignore hit counts and output just one bit per tuple.
+   As with the previous setting, you will need to recompile the target
+   binary: */
+
+// #define SKIP_COUNTS
+
+/* Uncomment this to use instrumentation data to record newly discovered paths,
+   but do not use them as seeds for fuzzing. This is useful for conveniently
+   measuring coverage that could be attained by a "dumb" fuzzing algorithm: */
+
+// #define IGNORE_FINDS
+
+#endif /* ! _HAVE_CONFIG_H */
diff --git a/include/debug.h b/include/debug.h
new file mode 100644
index 00000000..349aa650
--- /dev/null
+++ b/include/debug.h
@@ -0,0 +1,251 @@
+/*
+   american fuzzy lop - debug / error handling macros
+   --------------------------------------------------
+
+   Written and maintained by Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ */
+
+#ifndef _HAVE_DEBUG_H
+#define _HAVE_DEBUG_H
+
+#include <errno.h>
+
+#include "types.h"
+#include "config.h"
+
+/*******************
+ * Terminal colors *
+ *******************/
+
+#ifdef USE_COLOR
+
+#  define cBLK "\x1b[0;30m"
+#  define cRED "\x1b[0;31m"
+#  define cGRN "\x1b[0;32m"
+#  define cBRN "\x1b[0;33m"
+#  define cBLU "\x1b[0;34m"
+#  define cMGN "\x1b[0;35m"
+#  define cCYA "\x1b[0;36m"
+#  define cLGR "\x1b[0;37m"
+#  define cGRA "\x1b[1;90m"
+#  define cLRD "\x1b[1;91m"
+#  define cLGN "\x1b[1;92m"
+#  define cYEL "\x1b[1;93m"
+#  define cLBL "\x1b[1;94m"
+#  define cPIN "\x1b[1;95m"
+#  define cLCY "\x1b[1;96m"
+#  define cBRI "\x1b[1;97m"
+#  define cRST "\x1b[0m"
+
+#  define bgBLK "\x1b[40m"
+#  define bgRED "\x1b[41m"
+#  define bgGRN "\x1b[42m"
+#  define bgBRN "\x1b[43m"
+#  define bgBLU "\x1b[44m"
+#  define bgMGN "\x1b[45m"
+#  define bgCYA "\x1b[46m"
+#  define bgLGR "\x1b[47m"
+#  define bgGRA "\x1b[100m"
+#  define bgLRD "\x1b[101m"
+#  define bgLGN "\x1b[102m"
+#  define bgYEL "\x1b[103m"
+#  define bgLBL "\x1b[104m"
+#  define bgPIN "\x1b[105m"
+#  define bgLCY "\x1b[106m"
+#  define bgBRI "\x1b[107m"
+
+#else
+
+#  define cBLK ""
+#  define cRED ""
+#  define cGRN ""
+#  define cBRN ""
+#  define cBLU ""
+#  define cMGN ""
+#  define cCYA ""
+#  define cLGR ""
+#  define cGRA ""
+#  define cLRD ""
+#  define cLGN ""
+#  define cYEL ""
+#  define cLBL ""
+#  define cPIN ""
+#  define cLCY ""
+#  define cBRI ""
+#  define cRST ""
+
+#  define bgBLK ""
+#  define bgRED ""
+#  define bgGRN ""
+#  define bgBRN ""
+#  define bgBLU ""
+#  define bgMGN ""
+#  define bgCYA ""
+#  define bgLGR ""
+#  define bgGRA ""
+#  define bgLRD ""
+#  define bgLGN ""
+#  define bgYEL ""
+#  define bgLBL ""
+#  define bgPIN ""
+#  define bgLCY ""
+#  define bgBRI ""
+
+#endif /* ^USE_COLOR */
+
+/*************************
+ * Box drawing sequences *
+ *************************/
+
+#ifdef FANCY_BOXES
+
+#  define SET_G1   "\x1b)0"       /* Set G1 for box drawing    */
+#  define RESET_G1 "\x1b)B"       /* Reset G1 to ASCII         */
+#  define bSTART   "\x0e"         /* Enter G1 drawing mode     */
+#  define bSTOP    "\x0f"         /* Leave G1 drawing mode     */
+#  define bH       "q"            /* Horizontal line           */
+#  define bV       "x"            /* Vertical line             */
+#  define bLT      "l"            /* Left top corner           */
+#  define bRT      "k"            /* Right top corner          */
+#  define bLB      "m"            /* Left bottom corner        */
+#  define bRB      "j"            /* Right bottom corner       */
+#  define bX       "n"            /* Cross                     */
+#  define bVR      "t"            /* Vertical, branch right    */
+#  define bVL      "u"            /* Vertical, branch left     */
+#  define bHT      "v"            /* Horizontal, branch top    */
+#  define bHB      "w"            /* Horizontal, branch bottom */
+
+#else
+
+#  define SET_G1   ""
+#  define RESET_G1 ""
+#  define bSTART   ""
+#  define bSTOP    ""
+#  define bH       "-"
+#  define bV       "|"
+#  define bLT      "+"
+#  define bRT      "+"
+#  define bLB      "+"
+#  define bRB      "+"
+#  define bX       "+"
+#  define bVR      "+"
+#  define bVL      "+"
+#  define bHT      "+"
+#  define bHB      "+"
+
+#endif /* ^FANCY_BOXES */
+
+/***********************
+ * Misc terminal codes *
+ ***********************/
+
+#define TERM_HOME     "\x1b[H"
+#define TERM_CLEAR    TERM_HOME "\x1b[2J"
+#define cEOL          "\x1b[0K"
+#define CURSOR_HIDE   "\x1b[?25l"
+#define CURSOR_SHOW   "\x1b[?25h"
+
+/************************
+ * Debug & error macros *
+ ************************/
+
+/* Just print stuff to the appropriate stream. */
+
+#ifdef MESSAGES_TO_STDOUT
+#  define SAYF(x...)    printf(x)
+#else 
+#  define SAYF(x...)    fprintf(stderr, x)
+#endif /* ^MESSAGES_TO_STDOUT */
+
+/* Show a prefixed warning. */
+
+#define WARNF(x...) do { \
+    SAYF(cYEL "[!] " cBRI "WARNING: " cRST x); \
+    SAYF(cRST "\n"); \
+  } while (0)
+
+/* Show a prefixed "doing something" message. */
+
+#define ACTF(x...) do { \
+    SAYF(cLBL "[*] " cRST x); \
+    SAYF(cRST "\n"); \
+  } while (0)
+
+/* Show a prefixed "success" message. */
+
+#define OKF(x...) do { \
+    SAYF(cLGN "[+] " cRST x); \
+    SAYF(cRST "\n"); \
+  } while (0)
+
+/* Show a prefixed fatal error message (not used in afl). */
+
+#define BADF(x...) do { \
+    SAYF(cLRD "\n[-] " cRST x); \
+    SAYF(cRST "\n"); \
+  } while (0)
+
+/* Die with a verbose non-OS fatal error message. */
+
+#define FATAL(x...) do { \
+    SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \
+         cBRI x); \
+    SAYF(cLRD "\n         Location : " cRST "%s(), %s:%d\n\n", \
+         __FUNCTION__, __FILE__, __LINE__); \
+    exit(1); \
+  } while (0)
+
+/* Die by calling abort() to provide a core dump. */
+
+#define ABORT(x...) do { \
+    SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \
+         cBRI x); \
+    SAYF(cLRD "\n    Stop location : " cRST "%s(), %s:%d\n\n", \
+         __FUNCTION__, __FILE__, __LINE__); \
+    abort(); \
+  } while (0)
+
+/* Die while also including the output of perror(). */
+
+#define PFATAL(x...) do { \
+    fflush(stdout); \
+    SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-]  SYSTEM ERROR : " \
+         cBRI x); \
+    SAYF(cLRD "\n    Stop location : " cRST "%s(), %s:%d\n", \
+         __FUNCTION__, __FILE__, __LINE__); \
+    SAYF(cLRD "       OS message : " cRST "%s\n", strerror(errno)); \
+    exit(1); \
+  } while (0)
+
+/* Die with FAULT() or PFAULT() depending on the value of res (used to
+   interpret different failure modes for read(), write(), etc). */
+
+#define RPFATAL(res, x...) do { \
+    if (res < 0) PFATAL(x); else FATAL(x); \
+  } while (0)
+
+/* Error-checking versions of read() and write() that call RPFATAL() as
+   appropriate. */
+
+#define ck_write(fd, buf, len, fn) do { \
+    u32 _len = (len); \
+    s32 _res = write(fd, buf, _len); \
+    if (_res != _len) RPFATAL(_res, "Short write to %s", fn); \
+  } while (0)
+
+#define ck_read(fd, buf, len, fn) do { \
+    u32 _len = (len); \
+    s32 _res = read(fd, buf, _len); \
+    if (_res != _len) RPFATAL(_res, "Short read from %s", fn); \
+  } while (0)
+
+#endif /* ! _HAVE_DEBUG_H */
diff --git a/include/forkserver.h b/include/forkserver.h
new file mode 100644
index 00000000..fa40d9c6
--- /dev/null
+++ b/include/forkserver.h
@@ -0,0 +1,25 @@
+#ifndef __AFL_FORKSERVER_H
+#define __AFL_FORKSERVER_H
+
+void handle_timeout(int sig);
+void init_forkserver(char **argv);
+
+#ifdef __APPLE__
+#define MSG_FORK_ON_APPLE                                                      \
+  "    - On MacOS X, the semantics of fork() syscalls are non-standard and "   \
+  "may\n"                                                                      \
+  "      break afl-fuzz performance optimizations when running "               \
+  "platform-specific\n"                                                        \
+  "      targets. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"
+#else
+#define MSG_FORK_ON_APPLE ""
+#endif
+
+#ifdef RLIMIT_AS
+  #define MSG_ULIMIT_USAGE "      ( ulimit -Sv $[%llu << 10];"
+#else
+  #define MSG_ULIMIT_USAGE "      ( ulimit -Sd $[%llu << 10];"
+#endif /* ^RLIMIT_AS */
+
+
+#endif
diff --git a/include/hash.h b/include/hash.h
new file mode 100644
index 00000000..f39a8257
--- /dev/null
+++ b/include/hash.h
@@ -0,0 +1,104 @@
+/*
+   american fuzzy lop - hashing function
+   -------------------------------------
+
+   The hash32() function is a variant of MurmurHash3, a good
+   non-cryptosafe hashing function developed by Austin Appleby.
+
+   For simplicity, this variant does *NOT* accept buffer lengths
+   that are not divisible by 8 bytes. The 32-bit version is otherwise
+   similar to the original; the 64-bit one is a custom hack with
+   mostly-unproven properties.
+
+   Austin's original code is public domain.
+
+   Other code written and maintained by Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2016 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ */
+
+#ifndef _HAVE_HASH_H
+#define _HAVE_HASH_H
+
+#include "types.h"
+
+#ifdef __x86_64__
+
+#define ROL64(_x, _r)  ((((u64)(_x)) << (_r)) | (((u64)(_x)) >> (64 - (_r))))
+
+static inline u32 hash32(const void* key, u32 len, u32 seed) {
+
+  const u64* data = (u64*)key;
+  u64 h1 = seed ^ len;
+
+  len >>= 3;
+
+  while (len--) {
+
+    u64 k1 = *data++;
+
+    k1 *= 0x87c37b91114253d5ULL;
+    k1  = ROL64(k1, 31);
+    k1 *= 0x4cf5ad432745937fULL;
+
+    h1 ^= k1;
+    h1  = ROL64(h1, 27);
+    h1  = h1 * 5 + 0x52dce729;
+
+  }
+
+  h1 ^= h1 >> 33;
+  h1 *= 0xff51afd7ed558ccdULL;
+  h1 ^= h1 >> 33;
+  h1 *= 0xc4ceb9fe1a85ec53ULL;
+  h1 ^= h1 >> 33;
+
+  return h1;
+
+}
+
+#else 
+
+#define ROL32(_x, _r)  ((((u32)(_x)) << (_r)) | (((u32)(_x)) >> (32 - (_r))))
+
+static inline u32 hash32(const void* key, u32 len, u32 seed) {
+
+  const u32* data  = (u32*)key;
+  u32 h1 = seed ^ len;
+
+  len >>= 2;
+
+  while (len--) {
+
+    u32 k1 = *data++;
+
+    k1 *= 0xcc9e2d51;
+    k1  = ROL32(k1, 15);
+    k1 *= 0x1b873593;
+
+    h1 ^= k1;
+    h1  = ROL32(h1, 13);
+    h1  = h1 * 5 + 0xe6546b64;
+
+  }
+
+  h1 ^= h1 >> 16;
+  h1 *= 0x85ebca6b;
+  h1 ^= h1 >> 13;
+  h1 *= 0xc2b2ae35;
+  h1 ^= h1 >> 16;
+
+  return h1;
+
+}
+
+#endif /* ^__x86_64__ */
+
+#endif /* !_HAVE_HASH_H */
diff --git a/include/sharedmem.h b/include/sharedmem.h
new file mode 100644
index 00000000..9aa44d0e
--- /dev/null
+++ b/include/sharedmem.h
@@ -0,0 +1,7 @@
+#ifndef __AFL_SHAREDMEM_H
+#define __AFL_SHAREDMEM_H
+
+void setup_shm(unsigned char dumb_mode);
+void remove_shm(void);
+
+#endif
diff --git a/include/types.h b/include/types.h
new file mode 100644
index 00000000..7606d4ed
--- /dev/null
+++ b/include/types.h
@@ -0,0 +1,91 @@
+/*
+   american fuzzy lop - type definitions and minor macros
+   ------------------------------------------------------
+
+   Written and maintained by Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2013, 2014, 2015 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ */
+
+#ifndef _HAVE_TYPES_H
+#define _HAVE_TYPES_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+
+/*
+
+   Ugh. There is an unintended compiler / glibc #include glitch caused by
+   combining the u64 type an %llu in format strings, necessitating a workaround.
+
+   In essence, the compiler is always looking for 'unsigned long long' for %llu.
+   On 32-bit systems, the u64 type (aliased to uint64_t) is expanded to
+   'unsigned long long' in <bits/types.h>, so everything checks out.
+
+   But on 64-bit systems, it is #ifdef'ed in the same file as 'unsigned long'.
+   Now, it only happens in circumstances where the type happens to have the
+   expected bit width, *but* the compiler does not know that... and complains
+   about 'unsigned long' being unsafe to pass to %llu.
+
+ */
+
+#ifdef __x86_64__
+typedef unsigned long long u64;
+#else
+typedef uint64_t u64;
+#endif /* ^__x86_64__ */
+
+typedef int8_t   s8;
+typedef int16_t  s16;
+typedef int32_t  s32;
+typedef int64_t  s64;
+
+#ifndef MIN
+#  define MIN(_a,_b) ((_a) > (_b) ? (_b) : (_a))
+#  define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
+#endif /* !MIN */
+
+#define SWAP16(_x) ({ \
+    u16 _ret = (_x); \
+    (u16)((_ret << 8) | (_ret >> 8)); \
+  })
+
+#define SWAP32(_x) ({ \
+    u32 _ret = (_x); \
+    (u32)((_ret << 24) | (_ret >> 24) | \
+          ((_ret << 8) & 0x00FF0000) | \
+          ((_ret >> 8) & 0x0000FF00)); \
+  })
+
+#ifdef AFL_LLVM_PASS
+#  define AFL_R(x) (random() % (x))
+#else
+#  define R(x) (random() % (x))
+#endif /* ^AFL_LLVM_PASS */
+
+#define STRINGIFY_INTERNAL(x) #x
+#define STRINGIFY(x) STRINGIFY_INTERNAL(x)
+
+#define MEM_BARRIER() \
+  __asm__ volatile("" ::: "memory")
+
+#if __GNUC__ < 6
+ #define likely(_x)   (_x)
+ #define unlikely(_x) (_x)
+#else
+ #define likely(_x)   __builtin_expect(!!(_x), 1)
+ #define unlikely(_x)  __builtin_expect(!!(_x), 0)
+#endif
+
+#endif /* ! _HAVE_TYPES_H */