78 files changed, 9875 insertions, 631 deletions
diff --git a/.gitignore b/.gitignore
index cc0fd5c7..bb3c82eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
+*.o
+*.so
 .gitignore
-TODO
 afl-analyze
 afl-as
 afl-clang
@@ -9,6 +10,8 @@ afl-clang-fast++
 afl-fuzz
 afl-g++
 afl-gcc
+afl-gcc-fast
+afl-g++-fast
 afl-gotcpu
 afl-qemu-trace
 afl-showmap
@@ -16,3 +19,5 @@ afl-tmin
 as
 qemu_mode/qemu-3.1.0
 qemu_mode/qemu-3.1.0.tar.xz
+unicorn_mode/unicorn
+unicorn_mode/unicorn-*
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..9ef95bcf
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,11 @@
+language: c
+
+env:
+  - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1
+
+script:
+  - make
+  - ./afl-gcc ./test-instr.c -o test-instr
+  - mkdir seeds; mkdir out
+  - echo "" > seeds/nil_seed
+  - timeout --preserve-status 5s ./afl-fuzz -i seeds -o out/ -- ./test-instr
diff --git a/Makefile b/Makefile
index 17e35a27..4103dbfa 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,9 @@
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 
+# For Heiko:
+#TEST_MMAP=1
+
 PROGNAME    = afl
 VERSION     = $(shell grep '^\#define VERSION ' config.h | cut -d '"' -f2)
 
@@ -22,8 +25,6 @@ HELPER_PATH = $(PREFIX)/lib/afl
 DOC_PATH    = $(PREFIX)/share/doc/afl
 MISC_PATH   = $(PREFIX)/share/afl
 
-CLANG_COMPILER_RT = "/home/user/code/compiler-rt/cmake-build-debug/lib/linux"
-
 # PROGS intentionally omit afl-as, which gets installed elsewhere.
 
 PROGS       = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
@@ -48,7 +49,8 @@ endif
 
 COMM_HDR    = alloc-inl.h config.h debug.h types.h
 
-ifeq "$(shell echo '\#include <Python.h>XXXvoid main() {}' | sed 's/XXX/\n/g' | $(CC) -x c - -o .test -I$(PYTHON_INCLUDE) -lpython2.7 && echo 1 || echo 0 )" "1"
+
+ifeq "$(shell echo '\#include <Python.h>@int main() {return 0; }' | tr @ '\n' | $(CC) -x c - -o .test -I$(PYTHON_INCLUDE) -lpython2.7 2>/dev/null && echo 1 || echo 0 )" "1"
 	PYTHON_OK=1
 	PYFLAGS=-DUSE_PYTHON -I$(PYTHON_INCLUDE) -lpython2.7
 else
@@ -56,15 +58,31 @@ else
 	PYFLAGS=
 endif
 
-all:	test_x86 test_python27 $(PROGS) afl-as test_build all_done
+
+ifeq "$(shell echo '\#include <sys/ipc.h>@\#include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 )" "1"
+	SHMAT_OK=1
+else
+	SHMAT_OK=0
+	CFLAGS+=-DUSEMMAP=1
+	LDFLAGS+=-Wno-deprecated-declarations -lrt
+endif
+
+ifeq "$(TEST_MMAP)" "1"
+	SHMAT_OK=0
+	CFLAGS+=-DUSEMMAP=1
+	LDFLAGS+=-Wno-deprecated-declarations -lrt
+endif
+
+
+all:	test_x86 test_shm test_python27 ready $(PROGS) afl-as test_build all_done
+
 
 ifndef AFL_NO_X86
 
 test_x86:
 	@echo "[*] Checking for the ability to compile x86 code..."
-	@echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
-	@rm -f .test
-	@echo "[+] Everything seems to be working, ready to compile."
+	@echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test1 || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
+	@rm -f .test1
 
 else
 
@@ -73,6 +91,21 @@ test_x86:
 
 endif
 
+
+ifeq "$(SHMAT_OK)" "1"
+
+test_shm:
+	@echo "[+] shmat seems to be working."
+	@rm -f .test2
+
+else
+
+test_shm:
+	@echo "[-] shmat seems not to be working, switching to mmap implementation"
+
+endif
+
+
 ifeq "$(PYTHON_OK)" "1"
 
 test_python27:
@@ -86,6 +119,10 @@ test_python27:
 
 endif
 
+
+ready:
+	@echo "[+] Everything seems to be working, ready to compile."
+
 afl-gcc: afl-gcc.c $(COMM_HDR) | test_x86
 	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
 	set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done
@@ -94,30 +131,28 @@ afl-as: afl-as.c afl-as.h $(COMM_HDR) | test_x86
 	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
 	ln -sf afl-as as
 
-afl-fuzz: afl-fuzz.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS) $(PYFLAGS)
+afl-common.o : afl-common.c
+	$(CC) $(CFLAGS) -c afl-common.c
 
-smart-rabbit: afl-fuzz.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) afl-fuzz.c -o $@ $(LDFLAGS) -lstdc++ -lpthread -lm -lrt \
-	 -Wl,--whole-archive -Wl,$(CLANG_COMPILER_RT)/libclang_rt.ubsan_standalone-x86_64.a -Wl,--no-whole-archive \
-	 -Wl,--dynamic-list=$(CLANG_COMPILER_RT)/libclang_rt.ubsan_standalone-x86_64.a.syms \
-	 -Wl,--whole-archive -Wl,$(CLANG_COMPILER_RT)/libclang_rt.ubsan_standalone_cxx-x86_64.a -Wl,--no-whole-archive \
-	 -Wl,--dynamic-list=$(CLANG_COMPILER_RT)/libclang_rt.ubsan_standalone_cxx-x86_64.a.syms \
-	 -Wl,--whole-archive -Wl,$(CLANG_COMPILER_RT)/libclang_rt.fuzzer_no_main-x86_64.a -Wl,--no-whole-archive \
-	 -Wl,--dynamic-list=libclang_rt.fuzzer_no_main-x86_64.a.syms
+sharedmem.o : sharedmem.c
+	$(CC) $(CFLAGS) -c sharedmem.c
 
-afl-showmap: afl-showmap.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
+afl-fuzz: afl-fuzz.c afl-common.o sharedmem.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $@.c afl-common.o sharedmem.o -o $@ $(LDFLAGS) $(PYFLAGS)
 
-afl-tmin: afl-tmin.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
+afl-showmap: afl-showmap.c afl-common.o sharedmem.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $@.c afl-common.o sharedmem.o -o $@ $(LDFLAGS)
 
-afl-analyze: afl-analyze.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
+afl-tmin: afl-tmin.c afl-common.o sharedmem.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $@.c afl-common.o sharedmem.o -o $@ $(LDFLAGS)
+
+afl-analyze: afl-analyze.c afl-common.o sharedmem.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $@.c afl-common.o sharedmem.o -o $@ $(LDFLAGS)
 
 afl-gotcpu: afl-gotcpu.c $(COMM_HDR) | test_x86
 	$(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
 
+
 ifndef AFL_NO_X86
 
 test_build: afl-gcc afl-as afl-showmap
@@ -136,17 +171,18 @@ test_build: afl-gcc afl-as afl-showmap
 
 endif
 
+
 all_done: test_build
 	@if [ ! "`which clang 2>/dev/null`" = "" ]; then echo "[+] LLVM users: see llvm_mode/README.llvm for a faster alternative to afl-gcc."; fi
-	@echo "[+] All done! Be sure to review README - it's pretty short and useful."
+	@echo "[+] All done! Be sure to review the README.md - it's pretty short and useful."
 	@if [ "`uname`" = "Darwin" ]; then printf "\nWARNING: Fuzzing on MacOS X is slow because of the unusually high overhead of\nfork() on this OS. Consider using Linux or *BSD. You can also use VirtualBox\n(virtualbox.org) to put AFL inside a Linux or *BSD VM.\n\n"; fi
 	@! tty <&1 >/dev/null || printf "\033[0;30mNOTE: If you can read this, your terminal probably uses white background.\nThis will make the UI hard to read. See docs/status_screen.txt for advice.\033[0m\n" 2>/dev/null
 
 .NOTPARALLEL: clean
 
 clean:
-	rm -f $(PROGS) smart-rabbit afl-as as afl-g++ afl-clang afl-clang++ *.o *~ a.out core core.[1-9][0-9]* *.stackdump test .test test-instr .test-instr0 .test-instr1 qemu_mode/qemu-2.10.0.tar.bz2 afl-qemu-trace
-	rm -rf out_dir qemu_mode/qemu-2.10.0
+	rm -f $(PROGS) afl-as as afl-g++ afl-clang afl-clang++ *.o *~ a.out core core.[1-9][0-9]* *.stackdump test .test .test1 .test2 test-instr .test-instr0 .test-instr1 qemu_mode/qemu-3.1.0.tar.xz afl-qemu-trace afl-gcc-fast  afl-gcc-pass.so  afl-gcc-rt.o  afl-g++-fast
+	rm -rf out_dir qemu_mode/qemu-3.1.0
 	$(MAKE) -C llvm_mode clean
 	$(MAKE) -C libdislocator clean
 	$(MAKE) -C libtokencap clean
@@ -157,8 +193,9 @@ install: all
 	install -m 755 $(PROGS) $(SH_PROGS) $${DESTDIR}$(BIN_PATH)
 	rm -f $${DESTDIR}$(BIN_PATH)/afl-as
 	if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi
+	#if [ -f afl-gcc-fast ]; then set e; install -m 755 afl-gcc-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-gcc-fast $${DESTDIR}$(BIN_PATH)/afl-g++-fast; install -m 755 afl-gcc-pass.so afl-gcc-rt.o $${DESTDIR}$(HELPER_PATH); fi
 ifndef AFL_TRACE_PC
-	if [ -f afl-clang-fast -a -f afl-llvm-pass.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-pass.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f afl-clang-fast -a -f libLLVMInsTrim.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 libLLVMInsTrim.so afl-llvm-pass.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
 else
 	if [ -f afl-clang-fast -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
 endif
@@ -168,10 +205,12 @@ endif
 	if [ -f split-compares-pass.so ]; then set -e; install -m 755 split-compares-pass.so $${DESTDIR}$(HELPER_PATH); fi
 	if [ -f split-switches-pass.so ]; then set -e; install -m 755 split-switches-pass.so $${DESTDIR}$(HELPER_PATH); fi
 
-	set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/$$i; done
+	set -e; ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-g++
+	set -e; if [ -f afl-clang-fast ] ; then ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang++ ; else ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang++; fi
+
 	install -m 755 afl-as $${DESTDIR}$(HELPER_PATH)
 	ln -sf afl-as $${DESTDIR}$(HELPER_PATH)/as
-	install -m 644 docs/README docs/ChangeLog docs/*.txt $${DESTDIR}$(DOC_PATH)
+	install -m 644 docs/README.md docs/ChangeLog docs/*.txt $${DESTDIR}$(DOC_PATH)
 	cp -r testcases/ $${DESTDIR}$(MISC_PATH)
 	cp -r dictionaries/ $${DESTDIR}$(MISC_PATH)
 
@@ -182,7 +221,7 @@ publish: clean
 #	  tar -cvz -f ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz $(PROGNAME)-$(VERSION)
 #	chmod 644 ~/www/afl/releases/$(PROGNAME)-$(VERSION).tgz
 #	( cd ~/www/afl/releases/; ln -s -f $(PROGNAME)-$(VERSION).tgz $(PROGNAME)-latest.tgz )
-#	cat docs/README >~/www/afl/README.txt
+#	cat docs/README.md >~/www/afl/README.txt
 #	cat docs/status_screen.txt >~/www/afl/status_screen.txt
 #	cat docs/historical_notes.txt >~/www/afl/historical_notes.txt
 #	cat docs/technical_details.txt >~/www/afl/technical_details.txt
diff --git a/README b/README
deleted file mode 120000
index a90f4af9..00000000
--- a/README
+++ /dev/null
@@ -1 +0,0 @@
-docs/README
\ No newline at end of file
diff --git a/docs/README b/README.md
index ca38223d..953c1afb 100644
--- a/docs/README
+++ b/README.md
@@ -1,45 +1,48 @@
-============================
-american fuzzy lop plus plus
-============================
+# american fuzzy lop plus plus (afl++)
 
-  Written by Michal Zalewski <lcamtuf@google.com>
+  Originally developed by Michal "lcamtuf" Zalewski.
 
-  Repository: https://github.com/vanhauser-thc/AFLplusplus
+  Repository: [https://github.com/vanhauser-thc/AFLplusplus](https://github.com/vanhauser-thc/AFLplusplus)
 
-  afl++ is maintained by Marc Heuse <mh@mh-sec.de> and Heiko Eissfeldt
-  <heiko.eissfeldt@hexco.de> as there have been no updates to afl since
-  November 2017.
+  afl++ is maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eissfeldt
+  <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com>.
 
-  This version has several bug fixes, new features and speed enhancements
-  based on community patches from https://github.com/vanhauser-thc/afl-patches
-  To see the list of which patches have been applied, see the PATCHES file.
+## The enhancements compared to the original stock afl
 
-  Additionally AFLfast's power schedules by Marcel Boehme from
-  github.com/mboehme/aflfast have been incorporated.
+  Many improvements were made over the official afl release - which did not
+  get any improvements since November 2017.
 
-  Plus it was upgraded to qemu 3.1 from 2.1 with the work of 
-  https://github.com/andreafioraldi/afl and got the community patches applied
-  to it.
+  Among others afl++ has, e.g. more performant llvm_mode, supporting
+  llvm up to version 8, Qemu 3.1, more speed and crashfixes for Qemu,
+  laf-intel feature for Qemu (with libcompcov) and more.
 
-  C. Hoellers afl-fuzz Python mutator module and llvm_mode whitelist support
-  was added too (https://github.com/choller/afl)
+  Additionally the following patches have been integrated:
 
-  So all in all this is the best-of AFL that is currently out there :-)
+  * AFLfast's power schedules by Marcel Boehme: [https://github.com/mboehme/aflfast](https://github.com/mboehme/aflfast)
+
+  * C. Hollers afl-fuzz Python mutator module and llvm_mode whitelist support: [https://github.com/choller/afl](https://github.com/choller/afl)
+
+  * the new excellent MOpt mutator: [https://github.com/puppet-meteor/MOpt-AFL](https://github.com/puppet-meteor/MOpt-AFL)
 
+  * instrim, a very effective CFG llvm_mode instrumentation implementation for large targets: [https://github.com/csienslab/instrim](https://github.com/csienslab/instrim)
 
-  Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved.
-  Released under terms and conditions of Apache License, Version 2.0.
+  * unicorn_mode which allows fuzzing of binaries from completely different platforms (integration provided by domenukk)
+
+  A more thorough list is available in the PATCHES file.
+
+  So all in all this is the best-of AFL that is currently out there :-)
 
   For new versions and additional information, check out:
-  https://github.com/vanhauser-thc/AFLplusplus
+  [https://github.com/vanhauser-thc/AFLplusplus](https://github.com/vanhauser-thc/AFLplusplus)
 
   To compare notes with other users or get notified about major new features,
   send a mail to <afl-users+subscribe@googlegroups.com>.
 
-  ** See QuickStartGuide.txt if you don't have time to read this file. **
+  See [docs/QuickStartGuide.txt](docs/QuickStartGuide.txt) if you don't have time to
+  read this file.
 
 
-1) Challenges of guided fuzzing
+## 1) Challenges of guided fuzzing
 -------------------------------
 
 Fuzzing is one of the most powerful and proven strategies for identifying
@@ -67,8 +70,7 @@ to suffer from reliability and performance problems in practical uses - and
 currently do not offer a viable alternative to "dumb" fuzzing techniques.
 
 
-2) The afl-fuzz approach
-------------------------
+## 2) The afl-fuzz approach
 
 American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
 but rock-solid instrumentation-guided genetic algorithm. It uses a modified
@@ -107,8 +109,7 @@ The fuzzer is thoroughly tested to deliver out-of-the-box performance far
 superior to blind fuzzing or coverage-only tools.
 
 
-3) Instrumenting programs for use with AFL
-------------------------------------------
+## 3) Instrumenting programs for use with AFL
 
 PLEASE NOTE: llvm_mode compilation with afl-clang-fast/afl-clang-fast++
 instead of afl-gcc/afl-g++ is much faster and has a few cool features.
@@ -126,39 +127,45 @@ or even faster than possible with traditional tools.
 The correct way to recompile the target program may vary depending on the
 specifics of the build process, but a nearly-universal approach would be:
 
+```shell
 $ CC=/path/to/afl/afl-gcc ./configure
 $ make clean all
+```
 
-For C++ programs, you'd would also want to set CXX=/path/to/afl/afl-g++.
+For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`.
 
 The clang wrappers (afl-clang and afl-clang++) can be used in the same way;
 clang users may also opt to leverage a higher-performance instrumentation mode,
-as described in llvm_mode/README.llvm.
-Clang/LLVM has a much better performance and works from LLVM version 4.0 to 8.
+as described in [llvm_mode/README.llvm](llvm_mode/README.llvm).
+Clang/LLVM has a much better performance and works with LLVM version 4.0 to 8.
+
 Using the LAF Intel performance enhancements are also recommended, see 
-llvm_mode/README.laf-intel
+[llvm_mode/README.laf-intel](llvm_mode/README.laf-intel)
+
 Using partial instrumentation is also recommended, see
-llvm_mode/README.whitelist
+[llvm_mode/README.whitelist](llvm_mode/README.whitelist)
 
 When testing libraries, you need to find or write a simple program that reads
 data from stdin or from a file and passes it to the tested library. In such a
 case, it is essential to link this executable against a static version of the
 instrumented library, or to make sure that the correct .so file is loaded at
-runtime (usually by setting LD_LIBRARY_PATH). The simplest option is a static
+runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
 build, usually possible via:
 
+```shell
 $ CC=/path/to/afl/afl-gcc ./configure --disable-shared
+```
 
-Setting AFL_HARDEN=1 when calling 'make' will cause the CC wrapper to
+Setting `AFL_HARDEN=1` when calling 'make' will cause the CC wrapper to
 automatically enable code hardening options that make it easier to detect
 simple memory bugs. Libdislocator, a helper library included with AFL (see
-libdislocator/README.dislocator) can help uncover heap corruption issues, too.
+[libdislocator/README.dislocator](libdislocator/README.dislocator)) can help uncover heap corruption issues, too.
 
-PS. ASAN users are advised to docs/review notes_for_asan.txt file for
-important caveats.
+PS. ASAN users are advised to review [docs/notes_for_asan.txt](docs/notes_for_asan.txt)
+file for important caveats.
 
 
-4) Instrumenting binary-only apps
+## 4) Instrumenting binary-only apps
 ---------------------------------
 
 When source code is *NOT* available, the fuzzer offers experimental support for
@@ -168,10 +175,12 @@ with a version of QEMU running in the lesser-known "user space emulation" mode.
 QEMU is a project separate from AFL, but you can conveniently build the
 feature by doing:
 
+```shell
 $ cd qemu_mode
 $ ./build_qemu_support.sh
+```
 
-For additional instructions and caveats, see qemu_mode/README.qemu.
+For additional instructions and caveats, see [qemu_mode/README.qemu](qemu_mode/README.qemu).
 
 The mode is approximately 2-5x slower than compile-time instrumentation, is
 less conductive to parallelization, and may have some other quirks.
@@ -180,23 +189,25 @@ If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for
 your binary, then you can use afl-fuzz normally and it will have twice
 the speed compared to qemu_mode.
 
+A more comprehensive description of these and other options can be found in
+[docs/binaryonly_fuzzing.txt](docs/binaryonly_fuzzing.txt)
 
-5) Power schedules
+
+## 5) Power schedules
 ------------------
 
 The power schedules were copied from Marcel Böhme's excellent AFLfast
 implementation and expands on the ability to discover new paths and
 therefore the coverage.
 
-| AFL flag | Power Schedule             | 
-| ------------- | -------------------------- |
-| `-p explore` (default)| ![EXPLORE](http://latex.codecogs.com/gif.latex?p%28i%29%3D%5Cfrac%7B%5Calpha%28i%29%7D%7B%5Cbeta%7D) |
-| `-p fast` | ![FAST](http://latex.codecogs.com/gif.latex?p(i)=\\min\\left(\\frac{\\alpha(i)}{\\beta}\\cdot\\frac{2^{s(i)}}{f(i)},M\\right))  |
-| `-p coe` | ![COE](http://latex.codecogs.com/gif.latex?p%28i%29%3D%5Cbegin%7Bcases%7D%200%20%26%20%5Ctext%7B%20if%20%7D%20f%28i%29%20%3E%20%5Cmu%5C%5C%20%5Cmin%5Cleft%28%5Cfrac%7B%5Calpha%28i%29%7D%7B%5Cbeta%7D%5Ccdot%202%5E%7Bs%28i%29%7D%2C%20M%5Cright%29%20%26%20%5Ctext%7B%20otherwise.%7D%20%5Cend%7Bcases%7D) |
-| `-p quad` | ![QUAD](http://latex.codecogs.com/gif.latex?p%28i%29%20%3D%20%5Cmin%5Cleft%28%5Cfrac%7B%5Calpha%28i%29%7D%7B%5Cbeta%7D%5Ccdot%5Cfrac%7Bs%28i%29%5E2%7D%7Bf%28i%29%7D%2CM%5Cright%29) |
-| `-p lin` | ![LIN](http://latex.codecogs.com/gif.latex?p%28i%29%20%3D%20%5Cmin%5Cleft%28%5Cfrac%7B%5Calpha%28i%29%7D%7B%5Cbeta%7D%5Ccdot%5Cfrac%7Bs%28i%29%7D%7Bf%28i%29%7D%2CM%5Cright%29) |
-| `-p exploit` (AFL) | ![LIN](http://latex.codecogs.com/gif.latex?p%28i%29%20%3D%20%5Calpha%28i%29) |
-where *α(i)* is the performance score that AFL uses to compute for the seed input *i*, *β(i)>1* is a constant, *s(i)* is the number of times that seed *i* has been chosen from the queue, *f(i)* is the number of generated inputs that exercise the same path as seed *i*, and *μ* is the average number of generated inputs exercising a path.
+The available schedules are:
+ 
+ - explore (default)
+ - fast
+ - coe
+ - quad
+ - lin
+ - exploit
 
 In parallel mode (-M/-S, several instances with shared queue), we suggest to
 run the master using the exploit schedule (-p exploit) and the slaves with a
@@ -206,13 +217,15 @@ and explore (-p explore) schedules.
 In single mode, using -p fast is usually more beneficial than the default
 explore mode.
 (We don't want to change the default behaviour of afl, so "fast" has not been
-made the default mode)
+made the default mode).
 
-More details can be found in the paper:
-[23rd ACM Conference on Computer and Communications Security (CCS'16)](https://www.sigsac.org/ccs/CCS2016/accepted-papers/).
+More details can be found in the paper published at the 23rd ACM Conference on
+Computer and Communications Security (CCS'16):
+ 
+ (https://www.sigsac.org/ccs/CCS2016/accepted-papers/)[https://www.sigsac.org/ccs/CCS2016/accepted-papers/]
 
 
-6) Choosing initial test cases
+## 6) Choosing initial test cases
 ------------------------------
 
 To operate correctly, the fuzzer requires one or more starting file that
@@ -220,7 +233,7 @@ contains a good example of the input data normally expected by the targeted
 application. There are two basic rules:
 
   - Keep the files small. Under 1 kB is ideal, although not strictly necessary.
-    For a discussion of why size matters, see perf_tips.txt.
+    For a discussion of why size matters, see [perf_tips.txt](docs/perf_tips.txt).
 
   - Use multiple test cases only if they are functionally different from
     each other. There is no point in using fifty different vacation photos
@@ -234,7 +247,7 @@ the afl-cmin utility to identify a subset of functionally distinct files that
 exercise different code paths in the target binary.
 
 
-7) Fuzzing binaries
+## 7) Fuzzing binaries
 -------------------
 
 The fuzzing process itself is carried out by the afl-fuzz utility. This program
@@ -243,13 +256,17 @@ store its findings, plus a path to the binary to test.
 
 For target binaries that accept input directly from stdin, the usual syntax is:
 
+```shell
 $ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program [...params...]
+```
 
 For programs that take input from a file, use '@@' to mark the location in
 the target's command line where the input file name should be placed. The
 fuzzer will substitute this for you:
 
+```shell
 $ ./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
+```
 
 You can also use the -f option to have the mutated data written to a specific
 file. This is useful if the program expects a particular file extension or so.
@@ -261,7 +278,7 @@ You can use -t and -m to override the default timeout and memory limit for the
 executed process; rare examples of targets that may need these settings touched
 include compilers and video decoders.
 
-Tips for optimizing fuzzing performance are discussed in perf_tips.txt.
+Tips for optimizing fuzzing performance are discussed in [perf_tips.txt](docs/perf_tips.txt).
 
 Note that afl-fuzz starts by performing an array of deterministic fuzzing
 steps, which can take several days, but tend to produce neat test cases. If you
@@ -269,12 +286,12 @@ want quick & dirty results right away - akin to zzuf and other traditional
 fuzzers - add the -d option to the command line.
 
 
-8) Interpreting output
+## 8) Interpreting output
 ----------------------
 
-See the status_screen.txt file for information on how to interpret the
-displayed stats and monitor the health of the process. Be sure to consult this
-file especially if any UI elements are highlighted in red.
+See the [docs/status_screen.txt](docs/status_screen.txt) file for information on
+how to interpret the displayed stats and monitor the health of the process. Be
+sure to consult this file especially if any UI elements are highlighted in red.
 
 The fuzzing process will continue until you press Ctrl-C. At minimum, you want
 to allow the fuzzer to complete one queue cycle, which may take anywhere from a
@@ -312,35 +329,39 @@ queue entries. This should help with debugging.
 When you can't reproduce a crash found by afl-fuzz, the most likely cause is
 that you are not setting the same memory limit as used by the tool. Try:
 
+```shell
 $ LIMIT_MB=50
 $ ( ulimit -Sv $[LIMIT_MB << 10]; /path/to/tested_binary ... )
+```
 
 Change LIMIT_MB to match the -m parameter passed to afl-fuzz. On OpenBSD,
 also change -Sv to -Sd.
 
 Any existing output directory can be also used to resume aborted jobs; try:
 
+```shell
 $ ./afl-fuzz -i- -o existing_output_dir [...etc...]
+```
 
 If you have gnuplot installed, you can also generate some pretty graphs for any
 active fuzzing task using afl-plot. For an example of how this looks like,
-see http://lcamtuf.coredump.cx/afl/plot/.
+see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
 
 
-9) Parallelized fuzzing
+## 9) Parallelized fuzzing
 -----------------------
 
 Every instance of afl-fuzz takes up roughly one core. This means that on
 multi-core systems, parallelization is necessary to fully utilize the hardware.
 For tips on how to fuzz a common target on multiple cores or multiple networked
-machines, please refer to parallel_fuzzing.txt.
+machines, please refer to [docs/parallel_fuzzing.txt](docs/parallel_fuzzing.txt).
 
 The parallel fuzzing mode also offers a simple way for interfacing AFL to other
 fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
-last section of parallel_fuzzing.txt for tips.
+last section of [docs/parallel_fuzzing.txt](docs/parallel_fuzzing.txt) for tips.
 
 
-10) Fuzzer dictionaries
+## 10) Fuzzer dictionaries
 ----------------------
 
 By default, afl-fuzz mutation engine is optimized for compact data formats -
@@ -351,13 +372,13 @@ redundant verbiage - notably including HTML, SQL, or JavaScript.
 To avoid the hassle of building syntax-aware tools, afl-fuzz provides a way to
 seed the fuzzing process with an optional dictionary of language keywords,
 magic headers, or other special tokens associated with the targeted data type
-- and use that to reconstruct the underlying grammar on the go:
+-- and use that to reconstruct the underlying grammar on the go:
 
-  http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html
+  [http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html](http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html)
 
 To use this feature, you first need to create a dictionary in one of the two
-formats discussed in dictionaries/README.dictionaries; and then point the fuzzer
-to it via the -x option in the command line.
+formats discussed in [dictionaries/README.dictionaries](ictionaries/README.dictionaries);
+and then point the fuzzer to it via the -x option in the command line.
 
 (Several common dictionaries are already provided in that subdirectory, too.)
 
@@ -365,7 +386,7 @@ There is no way to provide more structured descriptions of the underlying
 syntax, but the fuzzer will likely figure out some of this based on the
 instrumentation feedback alone. This actually works in practice, say:
 
-  http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html
+  [http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html](http://lcamtuf.blogspot.com/2015/04/finding-bugs-in-sqlite-easy-way.html)
 
 PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
 existing syntax tokens in the input corpus by watching the instrumentation
@@ -374,10 +395,10 @@ parsers and grammars, but isn't nearly as good as the -x mode.
 
 If a dictionary is really hard to come by, another option is to let AFL run
 for a while, and then use the token capture library that comes as a companion
-utility with AFL. For that, see libtokencap/README.tokencap.
+utility with AFL. For that, see [libtokencap/README.tokencap](libtokencap/README.tokencap).
 
 
-11) Crash triage
+## 11) Crash triage
 ----------------
 
 The coverage-based grouping of crashes usually produces a small data set that
@@ -406,7 +427,9 @@ beneath.
 Oh, one more thing: for test case minimization, give afl-tmin a try. The tool
 can be operated in a very simple way:
 
+```shell
 $ ./afl-tmin -i test_case -o minimized_result -- /path/to/program [...]
+```
 
 The tool works with crashing and non-crashing test cases alike. In the crash
 mode, it will happily accept instrumented and non-instrumented binaries. In the
@@ -421,10 +444,10 @@ file, attempts to sequentially flip bytes, and observes the behavior of the
 tested program. It then color-codes the input based on which sections appear to
 be critical, and which are not; while not bulletproof, it can often offer quick
 insights into complex file formats. More info about its operation can be found
-near the end of technical_details.txt.
+near the end of [docs/technical_details.txt](docs/technical_details.txt).
 
 
-12) Going beyond crashes
+## 12) Going beyond crashes
 ------------------------
 
 Fuzzing is a wonderful and underutilized technique for discovering non-crashing
@@ -445,11 +468,11 @@ found by modifying the target programs to call abort() when, say:
 
 Implementing these or similar sanity checks usually takes very little time;
 if you are the maintainer of a particular package, you can make this code
-conditional with #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION (a flag also
-shared with libfuzzer) or #ifdef __AFL_COMPILER (this one is just for AFL).
+conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
+shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
 
 
-13) Common-sense risks
+## 13) Common-sense risks
 ----------------------
 
 Please keep in mind that, similarly to many other computationally-intensive
@@ -475,10 +498,12 @@ tasks, fuzzing may put strain on your hardware and on the OS. In particular:
 
     A good way to monitor disk I/O on Linux is the 'iostat' command:
 
+```shell
     $ iostat -d 3 -x -k [...optional disk ID...]
+```
 
 
-14) Known limitations & areas for improvement
+## 14) Known limitations & areas for improvement
 ---------------------------------------------
 
 Here are some of the most important caveats for AFL:
@@ -499,33 +524,34 @@ Here are some of the most important caveats for AFL:
     experimental/post_library/ (with AFL_POST_LIBRARY)
 
   - There are some unfortunate trade-offs with ASAN and 64-bit binaries. This
-    isn't due to any specific fault of afl-fuzz; see notes_for_asan.txt for
-    tips.
+    isn't due to any specific fault of afl-fuzz; see [docs/notes_for_asan.txt](docs/notes_for_asan.txt)
+    for tips.
 
   - There is no direct support for fuzzing network services, background
     daemons, or interactive apps that require UI interaction to work. You may
     need to make simple code changes to make them behave in a more traditional
     way. Preeny may offer a relatively simple option, too - see:
-    https://github.com/zardus/preeny
+    [https://github.com/zardus/preeny](https://github.com/zardus/preeny)
 
     Some useful tips for modifying network-based services can be also found at:
-    https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop
+    [https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop](https://www.fastly.com/blog/how-to-fuzz-server-american-fuzzy-lop)
 
   - AFL doesn't output human-readable coverage data. If you want to monitor
-    coverage, use afl-cov from Michael Rash: https://github.com/mrash/afl-cov
+    coverage, use afl-cov from Michael Rash: [https://github.com/mrash/afl-cov](https://github.com/mrash/afl-cov)
 
   - Occasionally, sentient machines rise against their creators. If this
-    happens to you, please consult http://lcamtuf.coredump.cx/prep/.
+    happens to you, please consult [http://lcamtuf.coredump.cx/prep/](http://lcamtuf.coredump.cx/prep/).
 
 Beyond this, see INSTALL for platform-specific tips.
 
 
-15) Special thanks
+## 15) Special thanks
 ------------------
 
-Many of the improvements to afl-fuzz wouldn't be possible without feedback,
-bug reports, or patches from:
+Many of the improvements to the original afl wouldn't be possible without
+feedback, bug reports, or patches from:
 
+```
   Jann Horn                             Hanno Boeck
   Felix Groebert                        Jakub Wilk
   Richard W. M. Jones                   Alexander Cherepanov
@@ -565,18 +591,18 @@ bug reports, or patches from:
   Rene Freingruber                      Sergey Davidoff
   Sami Liedes                           Craig Young
   Andrzej Jackowski                     Daniel Hodson
+  Nathan Voss				Dominik Maier
+```
 
 Thank you!
 
 
-16) Contact
+## 16) Contact
 -----------
 
 Questions? Concerns? Bug reports? The contributors can be reached via
-https://github.com/vanhauser-thc/AFLplusplus
+[https://github.com/vanhauser-thc/AFLplusplus](https://github.com/vanhauser-thc/AFLplusplus)
 
 There is also a mailing list for the afl project; to join, send a mail to
 <afl-users+subscribe@googlegroups.com>. Or, if you prefer to browse
-archives first, try:
-
-  https://groups.google.com/group/afl-users
+archives first, try: [https://groups.google.com/group/afl-users](https://groups.google.com/group/afl-users)
diff --git a/TODO b/TODO
new file mode 100644
index 00000000..3d1e444d
--- /dev/null
+++ b/TODO
@@ -0,0 +1,34 @@
+Roadmap 2.53d:
+==============
+ - indent all the code: clang-format -style=Google
+
+ - update docs/sister_projects.txt
+
+afl-fuzz:
+ - put mutator, scheduler, forkserver and input channels in individual files
+ - reuse forkserver for showmap, afl-cmin, etc.
+
+gcc_plugin:
+ - needs to be rewritten
+ - fix crashes when compiling :(
+ - whitelist support
+ - skip over uninteresting blocks
+ - laf-intel
+ - neverZero
+
+qemu_mode:
+ - deferred mode with AFL_DEFERRED_QEMU=0xaddress
+
+unit testing / or large testcase campaign
+
+
+Roadmap 2.54d:
+==============
+ - expand MAP size to 256k (current L2 cache size on processors)
+   -> 18 bit map
+ - llvm_mode: dynamic map size and collission free basic block IDs
+
+qemu_mode:
+ - persistent mode patching the return address (WinAFL style)
+ - instrument only comparison with immediate values by default when using compcov
+
diff --git a/afl-analyze.c b/afl-analyze.c
index 44be73f9..53b694ec 100644
--- a/afl-analyze.c
+++ b/afl-analyze.c
@@ -26,6 +26,8 @@
 #include "debug.h"
 #include "alloc-inl.h"
 #include "hash.h"
+#include "sharedmem.h"
+#include "afl-common.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -47,7 +49,7 @@
 
 static s32 child_pid;                 /* PID of the tested program         */
 
-static u8* trace_bits;                /* SHM with instrumentation bitmap   */
+       u8* trace_bits;                /* SHM with instrumentation bitmap   */
 
 static u8 *in_file,                   /* Analyzer input test case          */
           *prog_in,                   /* Targeted program input file       */
@@ -64,8 +66,7 @@ static u32 in_len,                    /* Input data length                 */
 
 static u64 mem_limit = MEM_LIMIT;     /* Memory limit (MB)                 */
 
-static s32 shm_id,                    /* ID of the SHM region              */
-           dev_null_fd = -1;          /* FD to /dev/null                   */
+static s32 dev_null_fd = -1;          /* FD to /dev/null                   */
 
 static u8  edges_only,                /* Ignore hit counts?                */
            use_hex_offsets,           /* Show hex offsets?                 */
@@ -141,37 +142,11 @@ static inline u8 anything_set(void) {
 }
 
 
-/* Get rid of shared memory and temp files (atexit handler). */
+/* Get rid of temp files (atexit handler). */
 
-static void remove_shm(void) {
+static void at_exit_handler(void) {
 
   unlink(prog_in); /* Ignore errors */
-  shmctl(shm_id, IPC_RMID, NULL);
-
-}
-
-
-/* Configure shared memory. */
-
-static void setup_shm(void) {
-
-  u8* shm_str;
-
-  shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
-
-  if (shm_id < 0) PFATAL("shmget() failed");
-
-  atexit(remove_shm);
-
-  shm_str = alloc_printf("%d", shm_id);
-
-  setenv(SHM_ENV_VAR, shm_str, 1);
-
-  ck_free(shm_str);
-
-  trace_bits = shmat(shm_id, NULL, 0);
-  
-  if (!trace_bits) PFATAL("shmat() failed");
 
 }
 
@@ -750,48 +725,6 @@ static void setup_signal_handlers(void) {
 }
 
 
-/* Detect @@ in args. */
-
-static void detect_file_args(char** argv) {
-
-  u32 i = 0;
-  u8* cwd = getcwd(NULL, 0);
-
-  if (!cwd) PFATAL("getcwd() failed");
-
-  while (argv[i]) {
-
-    u8* aa_loc = strstr(argv[i], "@@");
-
-    if (aa_loc) {
-
-      u8 *aa_subst, *n_arg;
-
-      /* Be sure that we're always using fully-qualified paths. */
-
-      if (prog_in[0] == '/') aa_subst = prog_in;
-      else aa_subst = alloc_printf("%s/%s", cwd, prog_in);
-
-      /* Construct a replacement argv value. */
-
-      *aa_loc = 0;
-      n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
-      argv[i] = n_arg;
-      *aa_loc = '@';
-
-      if (prog_in[0] != '/') ck_free(aa_subst);
-
-    }
-
-    i++;
-
-  }
-
-  free(cwd); /* not tracked */
-
-}
-
-
 /* Display usage hints. */
 
 static void usage(u8* argv0) {
@@ -807,7 +740,8 @@ static void usage(u8* argv0) {
        "  -f file       - input file read by the tested program (stdin)\n"
        "  -t msec       - timeout for each run (%u ms)\n"
        "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
+       "  -U            - use unicorn-based instrumentation (Unicorn mode)\n\n"
 
        "Analysis settings:\n\n"
 
@@ -933,20 +867,19 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 
 }
 
-
 /* Main entry point */
 
 int main(int argc, char** argv) {
 
   s32 opt;
-  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
+  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
   char** use_argv;
 
   doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
 
   SAYF(cCYA "afl-analyze" VERSION cRST " by <lcamtuf@google.com>\n");
 
-  while ((opt = getopt(argc,argv,"+i:f:m:t:eQ")) > 0)
+  while ((opt = getopt(argc,argv,"+i:f:m:t:eQU")) > 0)
 
     switch (opt) {
 
@@ -1026,6 +959,14 @@ int main(int argc, char** argv) {
         qemu_mode = 1;
         break;
 
+      case 'U':
+
+        if (unicorn_mode) FATAL("Multiple -U options not supported");
+        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
+
+        unicorn_mode = 1;
+        break;
+
       default:
 
         usage(argv[0]);
@@ -1036,13 +977,14 @@ int main(int argc, char** argv) {
 
   use_hex_offsets = !!getenv("AFL_ANALYZE_HEX");
 
-  setup_shm();
+  setup_shm(0);
+  atexit(at_exit_handler);
   setup_signal_handlers();
 
   set_up_environment();
 
   find_binary(argv[optind]);
-  detect_file_args(argv + optind);
+  detect_file_args(argv + optind, prog_in);
 
   if (qemu_mode)
     use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
diff --git a/afl-as.h b/afl-as.h
index ebd57109..4748eda7 100644
--- a/afl-as.h
+++ b/afl-as.h
@@ -189,6 +189,7 @@ static const u8* main_payload_32 =
   "  orb  $1, (%edx, %edi, 1)\n"
 #else
   "  incb (%edx, %edi, 1)\n"
+  "  adcb $0, (%edx, %edi, 1)\n" // never zero counter implementation. slightly better path discovery and little performance impact
 #endif /* ^SKIP_COUNTS */
   "\n"
   "__afl_return:\n"
@@ -220,6 +221,29 @@ static const u8* main_payload_32 =
   "  testl %eax, %eax\n"
   "  je    __afl_setup_abort\n"
   "\n"
+#ifdef USEMMAP
+  "  pushl $384        /* shm_open mode 0600 */\n"
+  "  pushl $2          /* flags O_RDWR   */\n"
+  "  pushl %eax        /* SHM file path  */\n"
+  "  call  shm_open\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl $-1, %eax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+  "  pushl $0          /* mmap off       */\n"
+  "  pushl %eax        /* shm fd         */\n"
+  "  pushl $1          /* mmap flags     */\n"
+  "  pushl $3          /* mmap prot      */\n"
+  "  pushl $"STRINGIFY(MAP_SIZE)"          /* mmap len       */\n"
+  "  pushl $0          /* mmap addr      */\n"
+  "  call  mmap\n"
+  "  addl  $12, %esp\n"
+  "\n"
+  "  cmpl $-1, %eax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+#else
   "  pushl %eax\n"
   "  call  atoi\n"
   "  addl  $4, %esp\n"
@@ -233,6 +257,7 @@ static const u8* main_payload_32 =
   "  cmpl $-1, %eax\n"
   "  je   __afl_setup_abort\n"
   "\n"
+#endif
   "  /* Store the address of the SHM region. */\n"
   "\n"
   "  movl %eax, __afl_area_ptr\n"
@@ -417,6 +442,7 @@ static const u8* main_payload_64 =
   "  orb  $1, (%rdx, %rcx, 1)\n"
 #else
   "  incb (%rdx, %rcx, 1)\n"
+  "  adcb $0, (%rdx, %rcx, 1)\n" // never zero counter implementation. slightly better path discovery and little performance impact
 #endif /* ^SKIP_COUNTS */
   "\n"
   "__afl_return:\n"
@@ -501,6 +527,27 @@ static const u8* main_payload_64 =
   "  testq %rax, %rax\n"
   "  je    __afl_setup_abort\n"
   "\n"
+#ifdef USEMMAP
+  "  movl $384, %edx   /* shm_open mode 0600 */\n"
+  "  movl $2,   %esi   /* flags O_RDWR   */\n"
+  "  movq %rax, %rdi   /* SHM file path  */\n"
+  CALL_L64("shm_open")
+  "\n"
+  "  cmpq $-1, %rax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+  "  movl    $0, %r9d\n"
+  "  movl    %eax, %r8d\n"
+  "  movl    $1, %ecx\n"
+  "  movl    $3, %edx\n"
+  "  movl    $"STRINGIFY(MAP_SIZE)", %esi\n"
+  "  movl    $0, %edi\n"
+  CALL_L64("mmap")
+  "\n"
+  "  cmpq $-1, %rax\n"
+  "  je   __afl_setup_abort\n"
+  "\n"
+#else
   "  movq  %rax, %rdi\n"
   CALL_L64("atoi")
   "\n"
@@ -512,6 +559,7 @@ static const u8* main_payload_64 =
   "  cmpq $-1, %rax\n"
   "  je   __afl_setup_abort\n"
   "\n"
+#endif
   "  /* Store the address of the SHM region. */\n"
   "\n"
   "  movq %rax, %rdx\n"
diff --git a/afl-cmin b/afl-cmin
index 9206bd70..a9ec4082 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -49,9 +49,9 @@ MEM_LIMIT=100
 TIMEOUT=none
 
 unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
-  AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE
+  AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
 
-while getopts "+i:o:f:m:t:eQC" opt; do
+while getopts "+i:o:f:m:t:eQUC" opt; do
 
   case "$opt" in 
 
@@ -83,6 +83,11 @@ while getopts "+i:o:f:m:t:eQC" opt; do
          test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
          QEMU_MODE=1
          ;;
+    "U")
+         EXTRA_PAR="$EXTRA_PAR -U"
+         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
+         UNICORN_MODE=1
+         ;;    
     "?")
          exit 1
          ;;
@@ -111,7 +116,8 @@ Execution control settings:
   -m megs       - memory limit for child process ($MEM_LIMIT MB)
   -t msec       - run time limit for child process (none)
   -Q            - use binary-only instrumentation (QEMU mode)
-
+  -U            - use unicorn-based instrumentation (Unicorn mode)
+  
 Minimization settings:
 
   -C            - keep crashing inputs, reject everything else
@@ -196,7 +202,7 @@ if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
 
 fi
 
-if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" ]; then
+if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
 
   if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
     echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
diff --git a/afl-common.c b/afl-common.c
new file mode 100644
index 00000000..1c5e5bfe
--- /dev/null
+++ b/afl-common.c
@@ -0,0 +1,69 @@
+/*
+ gather some functions common to multiple executables
+
+ detect_file_args
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <strings.h>
+
+#include "debug.h"
+#include "alloc-inl.h"
+
+/* Detect @@ in args. */
+#ifndef __glibc__
+#include <unistd.h>
+#endif
+void detect_file_args(char** argv, u8* prog_in) {
+
+  u32 i = 0;
+#ifdef __GLIBC__
+  u8* cwd = getcwd(NULL, 0); /* non portable glibc extension */
+#else
+  u8* cwd;
+  char *buf;
+  long size = pathconf(".", _PC_PATH_MAX);
+  if ((buf = (char *)malloc((size_t)size)) != NULL) {
+    cwd = getcwd(buf, (size_t)size); /* portable version */
+  } else {
+    PFATAL("getcwd() failed");
+  }
+#endif
+
+  if (!cwd) PFATAL("getcwd() failed");
+
+  while (argv[i]) {
+
+    u8* aa_loc = strstr(argv[i], "@@");
+
+    if (aa_loc) {
+
+      u8 *aa_subst, *n_arg;
+
+      if (!prog_in) FATAL("@@ syntax is not supported by this tool.");
+
+      /* Be sure that we're always using fully-qualified paths. */
+
+      if (prog_in[0] == '/') aa_subst = prog_in;
+      else aa_subst = alloc_printf("%s/%s", cwd, prog_in);
+
+      /* Construct a replacement argv value. */
+
+      *aa_loc = 0;
+      n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
+      argv[i] = n_arg;
+      *aa_loc = '@';
+
+      if (prog_in[0] != '/') ck_free(aa_subst);
+
+    }
+
+    i++;
+
+  }
+
+  free(cwd); /* not tracked */
+
+}
+
diff --git a/afl-common.h b/afl-common.h
new file mode 100644
index 00000000..07afb75d
--- /dev/null
+++ b/afl-common.h
@@ -0,0 +1,5 @@
+#ifndef __AFLCOMMON_H
+#define __AFLCOMMON_H
+
+void detect_file_args(char **argv, u8 *prog_in);
+#endif
diff --git a/afl-fuzz.c b/afl-fuzz.c
index 42970e39..a1388a55 100644
--- a/afl-fuzz.c
+++ b/afl-fuzz.c
@@ -23,7 +23,9 @@
 #define AFL_MAIN
 #define MESSAGES_TO_STDOUT
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #define _FILE_OFFSET_BITS 64
 
 #include "config.h"
@@ -31,6 +33,8 @@
 #include "debug.h"
 #include "alloc-inl.h"
 #include "hash.h"
+#include "sharedmem.h"
+#include "afl-common.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -76,9 +80,68 @@
 #  define EXP_ST static
 #endif /* ^AFL_LIB */
 
-/* Lots of globals, but mostly for the status UI and other things where it
+/* MOpt:
+   Lots of globals, but mostly for the status UI and other things where it
    really makes no sense to haul them around as function parameters. */
-
+EXP_ST u64 limit_time_puppet = 0;
+u64 orig_hit_cnt_puppet = 0;
+u64 last_limit_time_start = 0;
+u64 tmp_pilot_time = 0;
+u64 total_pacemaker_time = 0;
+u64 total_puppet_find = 0;
+u64 temp_puppet_find = 0;
+u64 most_time_key = 0;
+u64 most_time = 0;
+u64 most_execs_key = 0;
+u64 most_execs = 0;
+u64 old_hit_count = 0;
+int SPLICE_CYCLES_puppet;
+int limit_time_sig = 0;
+int key_puppet = 0;
+int key_module = 0;
+double w_init = 0.9;
+double w_end = 0.3;
+double w_now;
+int g_now = 0;
+int g_max = 5000;
+#define operator_num 16
+#define swarm_num 5
+#define period_core  500000
+u64 tmp_core_time = 0;
+int swarm_now = 0 ;
+double x_now[swarm_num][operator_num],
+       L_best[swarm_num][operator_num],
+       eff_best[swarm_num][operator_num],
+       G_best[operator_num],
+       v_now[swarm_num][operator_num],
+       probability_now[swarm_num][operator_num],
+       swarm_fitness[swarm_num];
+
+ static u64 stage_finds_puppet[swarm_num][operator_num],           /* Patterns found per fuzz stage    */
+            stage_finds_puppet_v2[swarm_num][operator_num],
+            stage_cycles_puppet_v2[swarm_num][operator_num],
+            stage_cycles_puppet_v3[swarm_num][operator_num],
+            stage_cycles_puppet[swarm_num][operator_num],
+            operator_finds_puppet[operator_num],
+            core_operator_finds_puppet[operator_num],
+            core_operator_finds_puppet_v2[operator_num],
+            core_operator_cycles_puppet[operator_num],
+            core_operator_cycles_puppet_v2[operator_num],
+            core_operator_cycles_puppet_v3[operator_num];          /* Execs per fuzz stage             */
+
+#define RAND_C (rand()%1000*0.001)
+#define v_max 1
+#define v_min 0.05
+#define limit_time_bound 1.1
+#define SPLICE_CYCLES_puppet_up 25
+#define SPLICE_CYCLES_puppet_low 5
+#define STAGE_RANDOMBYTE 12
+#define STAGE_DELETEBYTE 13
+#define STAGE_Clone75 14
+#define STAGE_OverWrite75 15
+#define period_pilot 50000
+double period_pilot_tmp = 5000.0;
+int key_lv = 0;
 
 EXP_ST u8 *in_dir,                    /* Input directory with test cases  */
           *out_file,                  /* File to fuzz, if any             */
@@ -125,6 +188,7 @@ char *power_names[] = {
 };
 
 static u8 schedule = EXPLORE;         /* Power schedule (default: EXPLORE)*/
+static u8 havoc_max_mult = HAVOC_MAX_MULT;
 
 EXP_ST u8  skip_deterministic,        /* Skip deterministic stages?       */
            force_deterministic,       /* Force deterministic stages?      */
@@ -146,6 +210,7 @@ EXP_ST u8  skip_deterministic,        /* Skip deterministic stages?       */
            shuffle_queue,             /* Shuffle input queue?             */
            bitmap_changed = 1,        /* Time to update bitmap?           */
            qemu_mode,                 /* Running in QEMU mode?            */
+           unicorn_mode,              /* Running in Unicorn mode?         */
            skip_requested,            /* Skip request, via SIGUSR1        */
            run_over10m,               /* Run time over 10 minutes?        */
            persistent_mode,           /* Running in persistent mode?      */
@@ -163,7 +228,7 @@ static s32 forksrv_pid,               /* PID of the fork server           */
            child_pid = -1,            /* PID of the fuzzed program        */
            out_dir_fd = -1;           /* FD of the lock file              */
 
-EXP_ST u8* trace_bits;                /* SHM with instrumentation bitmap  */
+       u8* trace_bits;                /* SHM with instrumentation bitmap  */
 
 EXP_ST u8  virgin_bits[MAP_SIZE],     /* Regions yet untouched by fuzzing */
            virgin_tmout[MAP_SIZE],    /* Bits we haven't seen in tmouts   */
@@ -171,8 +236,6 @@ EXP_ST u8  virgin_bits[MAP_SIZE],     /* Regions yet untouched by fuzzing */
 
 static u8  var_bytes[MAP_SIZE];       /* Bytes that appear to be variable */
 
-static s32 shm_id;                    /* ID of the SHM region             */
-
 static volatile u8 stop_soon,         /* Ctrl-C pressed?                  */
                    clear_screen = 1,  /* Window resized?                  */
                    child_timed_out;   /* Traced process timed out?        */
@@ -259,6 +322,7 @@ struct queue_entry {
 
   u8  cal_failed,                     /* Calibration failed?              */
       trim_done,                      /* Trimmed?                         */
+      was_fuzzed,                     /* historical, but needed for MOpt  */
       passed_det,                     /* Deterministic stages passed?     */
       has_new_cov,                    /* Triggers new coverage?           */
       var_behavior,                   /* Variable behavior?               */
@@ -574,6 +638,34 @@ static void trim_py(char** ret, size_t* retlen) {
 #endif /* USE_PYTHON */
 
 
+int select_algorithm(void) {
+
+  int i_puppet, j_puppet;
+  u32 seed[2];
+
+  if (!fixed_seed) {
+    ck_read(dev_urandom_fd, &seed, sizeof(seed), "/dev/urandom");
+    srandom(seed[0]);
+  }
+
+  double sele = ((double)(random()%10000)*0.0001);
+  j_puppet = 0;
+  for (i_puppet = 0; i_puppet < operator_num; i_puppet++) {
+      if (unlikely(i_puppet == 0)) {
+          if (sele < probability_now[swarm_now][i_puppet])
+            break;
+      } else {
+          if (sele < probability_now[swarm_now][i_puppet]) {
+              j_puppet =1;
+              break;
+          }
+      }
+  }
+  if (j_puppet ==1 && sele < probability_now[swarm_now][i_puppet-1])
+    FATAL("error select_algorithm");
+  return i_puppet;
+}
+
 
 /* Get unix time in milliseconds */
 
@@ -609,18 +701,14 @@ static u64 get_cur_time_us(void) {
 static inline u32 UR(u32 limit) {
 
   if (!fixed_seed && unlikely(!rand_cnt--)) {
-
     u32 seed[2];
 
     ck_read(dev_urandom_fd, &seed, sizeof(seed), "/dev/urandom");
-
     srandom(seed[0]);
     rand_cnt = (RESEED_RNG / 2) + (seed[1] % RESEED_RNG);
-
   }
 
   return random() % limit;
-
 }
 
 
@@ -1450,15 +1538,6 @@ static inline void classify_counts(u32* mem) {
 #endif /* ^__x86_64__ */
 
 
-/* Get rid of shared memory (atexit handler). */
-
-static void remove_shm(void) {
-
-  shmctl(shm_id, IPC_RMID, NULL);
-
-}
-
-
 /* Compact trace bytes into a smaller bitmap. We effectively just drop the
    count information here. This is called only sporadically, for some
    new paths. */
@@ -1477,6 +1556,7 @@ static void minimize_bits(u8* dst, u8* src) {
 }
 
 
+
 /* Find first power of two greater or equal to val (assuming val under
    2^63). */
 
@@ -1499,6 +1579,7 @@ static u64 next_p2(u64 val) {
    for every byte in the bitmap. We win that slot if there is no previous
    contender, or if the contender has a more favorable speed x size factor. */
 
+
 static void update_bitmap_score(struct queue_entry* q) {
 
   u32 i;
@@ -1514,6 +1595,7 @@ static void update_bitmap_score(struct queue_entry* q) {
 
        if (top_rated[i]) {
 
+         /* Faster-executing or smaller test cases are favored. */
          u64 top_rated_fuzz_p2    = next_p2 (top_rated[i]->n_fuzz);
          u64 top_rated_fav_factor = top_rated[i]->exec_us * top_rated[i]->len;
 
@@ -1598,7 +1680,7 @@ static void cull_queue(void) {
       top_rated[i]->favored = 1;
       queued_favored++;
 
-      if (top_rated[i]->fuzz_level == 0) pending_favored++;
+      if (top_rated[i]->fuzz_level == 0 || !top_rated[i]->was_fuzzed) pending_favored++;
 
     }
 
@@ -1612,41 +1694,6 @@ static void cull_queue(void) {
 }
 
 
-/* Configure shared memory and virgin_bits. This is called at startup. */
-
-EXP_ST void setup_shm(void) {
-
-  u8* shm_str;
-
-  if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE);
-
-  memset(virgin_tmout, 255, MAP_SIZE);
-  memset(virgin_crash, 255, MAP_SIZE);
-
-  shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
-
-  if (shm_id < 0) PFATAL("shmget() failed");
-
-  atexit(remove_shm);
-
-  shm_str = alloc_printf("%d", shm_id);
-
-  /* If somebody is asking us to fuzz instrumented binaries in dumb mode,
-     we don't want them to detect instrumentation, since we won't be sending
-     fork server commands. This should be replaced with better auto-detection
-     later on, perhaps? */
-
-  if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1);
-
-  ck_free(shm_str);
-
-  trace_bits = shmat(shm_id, NULL, 0);
-  
-  if (!trace_bits) PFATAL("shmat() failed");
-
-}
-
-
 /* Load postprocessor, if available. */
 
 static void setup_post(void) {
@@ -2285,6 +2332,8 @@ EXP_ST void init_forkserver(char** argv) {
 
   if (!forksrv_pid) {
 
+    /* CHILD PROCESS */
+
     struct rlimit r;
 
     /* Umpf. On OpenBSD, the default fd limit for root users is set to
@@ -2392,6 +2441,8 @@ EXP_ST void init_forkserver(char** argv) {
 
   }
 
+  /* PARENT PROCESS */
+
   /* Close the unneeded endpoints. */
 
   close(ctl_pipe[0]);
@@ -2456,7 +2507,7 @@ EXP_ST void init_forkserver(char** argv) {
 #endif /* __APPLE__ */
 
            "    - Less likely, there is a horrible bug in the fuzzer. If other options\n"
-           "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");
+           "      fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n");
 
     } else {
 
@@ -2490,7 +2541,7 @@ EXP_ST void init_forkserver(char** argv) {
 #endif /* __APPLE__ */
 
            "    - Less likely, there is a horrible bug in the fuzzer. If other options\n"
-           "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
+           "      fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n",
            DMS(mem_limit << 20), mem_limit - 1);
 
     }
@@ -2515,7 +2566,7 @@ EXP_ST void init_forkserver(char** argv) {
     SAYF("\n" cLRD "[-] " cRST
          "Hmm, looks like the target binary terminated before we could complete a\n"
          "    handshake with the injected code. Perhaps there is a horrible bug in the\n"
-         "    fuzzer. Poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");
+         "    fuzzer. Poke <afl-users@googlegroups.com> for troubleshooting tips.\n");
 
   } else {
 
@@ -2538,7 +2589,7 @@ EXP_ST void init_forkserver(char** argv) {
          "      estimate the required amount of virtual memory for the binary.\n\n"
 
          "    - Less likely, there is a horrible bug in the fuzzer. If other options\n"
-         "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
+         "      fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n",
          getenv(DEFER_ENV_VAR) ? "three" : "two",
          getenv(DEFER_ENV_VAR) ?
          "    - You are using deferred forkserver, but __AFL_INIT() is never\n"
@@ -3126,7 +3177,7 @@ static void perform_dry_run(char** argv) {
 #endif /* __APPLE__ */
 
                "    - Least likely, there is a horrible bug in the fuzzer. If other options\n"
-               "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
+               "      fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n",
                DMS(mem_limit << 20), mem_limit - 1, doc_path);
 
         } else {
@@ -3148,7 +3199,7 @@ static void perform_dry_run(char** argv) {
 #endif /* __APPLE__ */
 
                "    - Least likely, there is a horrible bug in the fuzzer. If other options\n"
-               "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");
+               "      fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n");
 
         }
 
@@ -3339,6 +3390,8 @@ static u8* describe_op(u8 hnb) {
 
     sprintf(ret, "src:%06u", current_entry);
 
+    sprintf(ret + strlen(ret), ",time:%llu", get_cur_time() - start_time);
+
     if (splicing_with >= 0)
       sprintf(ret + strlen(ret), "+%06u", splicing_with);
 
@@ -3399,12 +3452,9 @@ static void write_crash_readme(void) {
              "them to a vendor? Check out the afl-tmin that comes with the fuzzer!\n\n"
 
              "Found any cool bugs in open-source tools using afl-fuzz? If yes, please drop\n"
-             "me a mail at <lcamtuf@coredump.cx> once the issues are fixed - I'd love to\n"
-             "add your finds to the gallery at:\n\n"
-
-             "  http://lcamtuf.coredump.cx/afl/\n\n"
+             "an mail at <afl-users@googlegroups.com> once the issues are fixed\n\n"
 
-             "Thanks :-)\n",
+             "  https://github.com/vanhauser-thc/AFLplusplus\n\n",
 
              orig_cmdline, DMS(mem_limit << 20)); /* ignore errors */
 
@@ -3419,6 +3469,8 @@ static void write_crash_readme(void) {
 
 static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) {
 
+  if (len == 0) return 0;
+
   u8  *fn = "";
   u8  hnb;
   s32 fd;
@@ -3744,7 +3796,7 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
              "exec_timeout      : %u\n"
              "afl_banner        : %s\n"
              "afl_version       : " VERSION "\n"
-             "target_mode       : %s%s%s%s%s%s%s\n"
+             "target_mode       : %s%s%s%s%s%s%s%s\n"
              "command_line      : %s\n",
              start_time / 1000, get_cur_time() / 1000, getpid(),
              queue_cycle ? (queue_cycle - 1) : 0, total_execs, eps,
@@ -3754,10 +3806,10 @@ static void write_stats_file(double bitmap_cvg, double stability, double eps) {
              unique_hangs, last_path_time / 1000, last_crash_time / 1000,
              last_hang_time / 1000, total_execs - last_crash_execs,
              exec_tmout, use_banner,
-             qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
+             unicorn_mode ? "unicorn" : "", qemu_mode ? "qemu " : "", dumb_mode ? " dumb " : "",
              no_forkserver ? "no_forksrv " : "", crash_mode ? "crash " : "",
              persistent_mode ? "persistent " : "", deferred_mode ? "deferred " : "",
-             (qemu_mode || dumb_mode || no_forkserver || crash_mode ||
+             (unicorn_mode || qemu_mode || dumb_mode || no_forkserver || crash_mode ||
               persistent_mode || deferred_mode) ? "" : "default",
              orig_cmdline);
              /* ignore errors */
@@ -4136,7 +4188,6 @@ static void maybe_delete_out_dir(void) {
 
   /* And now, for some finishing touches. */
 
-  //fn = alloc_printf("%s/.cur_input", tmp_dir);
   if (file_extension) {
     fn = alloc_printf("%s/.cur_input.%s", out_dir, file_extension);
   } else {
@@ -4313,13 +4364,19 @@ static void show_stats(void) {
 
   /* Let's start by drawing a centered banner. */
 
-  banner_len = (crash_mode ? 24 : 22) + strlen(VERSION) + strlen(use_banner) + strlen(power_name) + 3;
+  banner_len = (crash_mode ? 24 : 22) + strlen(VERSION) + strlen(use_banner) + strlen(power_name) + 3 + 5;
   banner_pad = (79 - banner_len) / 2;
   memset(tmp, ' ', banner_pad);
 
+#ifdef HAVE_AFFINITY
+  sprintf(tmp + banner_pad, "%s " cLCY VERSION cLGN
+          " (%s) " cPIN "[%s]" cBLU " {%d}",  crash_mode ? cPIN "peruvian were-rabbit" :
+          cYEL "american fuzzy lop", use_banner, power_name, cpu_aff);
+#else
   sprintf(tmp + banner_pad, "%s " cLCY VERSION cLGN
           " (%s) " cPIN "[%s]",  crash_mode ? cPIN "peruvian were-rabbit" :
           cYEL "american fuzzy lop", use_banner, power_name);
+#endif /* HAVE_AFFINITY */
 
   SAYF("\n%s\n", tmp);
 
@@ -4697,7 +4754,7 @@ static void show_init_stats(void) {
 
   SAYF("\n");
 
-  if (avg_us > (qemu_mode ? 50000 : 10000)) 
+  if (avg_us > ((qemu_mode || unicorn_mode) ? 50000 : 10000))
     WARNF(cLRD "The target binary is pretty slow! See %s/perf_tips.txt.",
           doc_path);
 
@@ -4774,6 +4831,7 @@ static void show_init_stats(void) {
 
 }
 
+
 #ifdef USE_PYTHON
 static u8 trim_case_python(char** argv, struct queue_entry* q, u8* in_buf) {
 
@@ -5229,10 +5287,13 @@ static u32 calculate_score(struct queue_entry* q) {
 
   perf_score *= factor / POWER_BETA;
 
+  // MOpt mode
+  if (limit_time_sig != 0 && max_depth - q->depth < 3) perf_score *= 2;
+  else if (perf_score < 1) perf_score = 1; // Add a lower bound to AFLFast's energy assignment strategies
 
   /* Make sure that we don't go over limit. */
 
-  if (perf_score > HAVOC_MAX_MULT * 100) perf_score = HAVOC_MAX_MULT * 100;
+  if (perf_score > havoc_max_mult * 100) perf_score = havoc_max_mult * 100;
 
   return perf_score;
 
@@ -5429,7 +5490,7 @@ static u8 could_be_interest(u32 old_val, u32 new_val, u8 blen, u8 check_le) {
    function is a tad too long... returns 0 if fuzzed successfully, 1 if
    skipped or bailed out. */
 
-static u8 fuzz_one(char** argv) {
+static u8 fuzz_one_original(char** argv) {
 
   s32 len, fd, temp_len, i, j;
   u8  *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0;
@@ -5456,7 +5517,7 @@ static u8 fuzz_one(char** argv) {
        possibly skip to them at the expense of already-fuzzed or non-favored
        cases. */
 
-    if ((queue_cur->fuzz_level > 0 || !queue_cur->favored) &&
+    if (((queue_cur->was_fuzzed > 0 || queue_cur->fuzz_level > 0) || !queue_cur->favored) &&
         UR(100) < SKIP_TO_NEW_PROB) return 1;
 
   } else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) {
@@ -5465,7 +5526,7 @@ static u8 fuzz_one(char** argv) {
        The odds of skipping stuff are higher for already-fuzzed inputs and
        lower for never-fuzzed entries. */
 
-    if (queue_cycle > 1 && queue_cur->fuzz_level == 0) {
+    if (queue_cycle > 1 && (queue_cur->fuzz_level == 0 || queue_cur->was_fuzzed)) {
 
       if (UR(100) < SKIP_NFAV_NEW_PROB) return 1;
 
@@ -5607,9 +5668,9 @@ static u8 fuzz_one(char** argv) {
   if (skip_deterministic
      || ((!queue_cur->passed_det)
         && perf_score < (
-              queue_cur->depth * 30 <= HAVOC_MAX_MULT * 100
+              queue_cur->depth * 30 <= havoc_max_mult * 100
               ? queue_cur->depth * 30
-              : HAVOC_MAX_MULT * 100))
+              : havoc_max_mult * 100))
      || queue_cur->passed_det)
 #ifdef USE_PYTHON
     goto python_stage;
@@ -6655,7 +6716,7 @@ retry_external_pick:
          permitting. */
 
       if (queued_paths != havoc_queued) {
-        if (perf_score <= HAVOC_MAX_MULT * 100) {
+        if (perf_score <= havoc_max_mult * 100) {
           stage_max  *= 2;
           perf_score *= 2;
         }
@@ -7066,7 +7127,7 @@ havoc_stage:
               /* Inserted part */
               memcpy(new_buf + insert_at, a_extras[use_extra].data, extra_len);
 
-            } else {
+	    } else {
 
               use_extra = UR(extras_cnt);
               extra_len = extras[use_extra].len;
@@ -7114,7 +7175,7 @@ havoc_stage:
 
     if (queued_paths != havoc_queued) {
 
-      if (perf_score <= HAVOC_MAX_MULT * 100) {
+      if (perf_score <= havoc_max_mult * 100) {
         stage_max  *= 2;
         perf_score *= 2;
       }
@@ -7240,8 +7301,9 @@ abandon_entry:
   /* Update pending_not_fuzzed count if we made it through the calibration
      cycle and have not seen this entry before. */
 
-  if (!stop_soon && !queue_cur->cal_failed && queue_cur->fuzz_level == 0) {
+  if (!stop_soon && !queue_cur->cal_failed && (queue_cur->was_fuzzed == 0 || queue_cur->fuzz_level == 0)) {
     pending_not_fuzzed--;
+    queue_cur->was_fuzzed = 1;
     if (queue_cur->favored) pending_favored--;
   }
 
@@ -7259,6 +7321,3577 @@ abandon_entry:
 
 }
 
+/* MOpt mode */
+static u8 pilot_fuzzing(char** argv) {
+
+	s32 len, fd, temp_len, i, j;
+	u8  *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0;
+	u64 havoc_queued, orig_hit_cnt, new_hit_cnt, cur_ms_lv;
+	u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1;
+
+	u8  ret_val = 1, doing_det = 0;
+
+	u8  a_collect[MAX_AUTO_EXTRA];
+	u32 a_len = 0;
+
+#ifdef IGNORE_FINDS
+
+	/* In IGNORE_FINDS mode, skip any entries that weren't in the
+	   initial data set. */
+
+	if (queue_cur->depth > 1) return 1;
+
+#else
+
+	if (pending_favored) {
+
+		/* If we have any favored, non-fuzzed new arrivals in the queue,
+		   possibly skip to them at the expense of already-fuzzed or non-favored
+		   cases. */
+
+		if ((queue_cur->was_fuzzed || !queue_cur->favored) &&
+			UR(100) < SKIP_TO_NEW_PROB) return 1;
+
+	}
+	else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) {
+
+		/* Otherwise, still possibly skip non-favored cases, albeit less often.
+		   The odds of skipping stuff are higher for already-fuzzed inputs and
+		   lower for never-fuzzed entries. */
+
+		if (queue_cycle > 1 && !queue_cur->was_fuzzed) {
+
+			if (UR(100) < SKIP_NFAV_NEW_PROB) return 1;
+
+		}
+		else {
+
+			if (UR(100) < SKIP_NFAV_OLD_PROB) return 1;
+
+		}
+
+	}
+
+#endif /* ^IGNORE_FINDS */
+
+	if (not_on_tty) {
+		ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...",
+			current_entry, queued_paths, unique_crashes);
+		fflush(stdout);
+	}
+
+	/* Map the test case into memory. */
+
+	fd = open(queue_cur->fname, O_RDONLY);
+
+	if (fd < 0) PFATAL("Unable to open '%s'", queue_cur->fname);
+
+	len = queue_cur->len;
+
+	orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+	if (orig_in == MAP_FAILED) PFATAL("Unable to mmap '%s'", queue_cur->fname);
+
+	close(fd);
+
+	/* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every
+	   single byte anyway, so it wouldn't give us any performance or memory usage
+	   benefits. */
+
+	out_buf = ck_alloc_nozero(len);
+
+	subseq_tmouts = 0;
+
+	cur_depth = queue_cur->depth;
+
+	/*******************************************
+	 * CALIBRATION (only if failed earlier on) *
+	 *******************************************/
+
+	if (queue_cur->cal_failed) {
+
+		u8 res = FAULT_TMOUT;
+
+		if (queue_cur->cal_failed < CAL_CHANCES) {
+
+			res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0);
+
+			if (res == FAULT_ERROR)
+				FATAL("Unable to execute target application");
+
+		}
+
+		if (stop_soon || res != crash_mode) {
+			cur_skipped_paths++;
+			goto abandon_entry;
+		}
+
+	}
+
+	/************
+	 * TRIMMING *
+	 ************/
+
+	if (!dumb_mode && !queue_cur->trim_done) {
+
+		u8 res = trim_case(argv, queue_cur, in_buf);
+
+		if (res == FAULT_ERROR)
+			FATAL("Unable to execute target application");
+
+		if (stop_soon) {
+			cur_skipped_paths++;
+			goto abandon_entry;
+		}
+
+		/* Don't retry trimming, even if it failed. */
+
+		queue_cur->trim_done = 1;
+
+		if (len != queue_cur->len) len = queue_cur->len;
+
+	}
+
+	memcpy(out_buf, in_buf, len);
+
+	/*********************
+	 * PERFORMANCE SCORE *
+	 *********************/
+
+	orig_perf = perf_score = calculate_score(queue_cur);
+
+	/* Skip right away if -d is given, if we have done deterministic fuzzing on
+	   this entry ourselves (was_fuzzed), or if it has gone through deterministic
+	   testing in earlier, resumed runs (passed_det). */
+
+	if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det)
+		goto havoc_stage;
+
+	/* Skip deterministic fuzzing if exec path checksum puts this out of scope
+	   for this master instance. */
+
+	if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1)
+		goto havoc_stage;
+
+
+	cur_ms_lv = get_cur_time();
+	if (!(key_puppet == 0 && ((cur_ms_lv - last_path_time < limit_time_puppet) ||
+		(last_crash_time != 0 && cur_ms_lv - last_crash_time < limit_time_puppet) || last_path_time == 0)))
+	{
+		key_puppet = 1;
+		goto pacemaker_fuzzing;
+	}
+
+	doing_det = 1;
+
+		/*********************************************
+		 * SIMPLE BITFLIP (+dictionary construction) *
+		 *********************************************/
+
+#define FLIP_BIT(_ar, _b) do { \
+    u8* _arf = (u8*)(_ar); \
+    u32 _bf = (_b); \
+    _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \
+  } while (0)
+
+		 /* Single walking bit. */
+
+		stage_short = "flip1";
+		stage_max = len << 3;
+		stage_name = "bitflip 1/1";
+
+
+
+
+		stage_val_type = STAGE_VAL_NONE;
+
+		orig_hit_cnt = queued_paths + unique_crashes;
+
+		prev_cksum = queue_cur->exec_cksum;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur >> 3;
+
+			FLIP_BIT(out_buf, stage_cur);
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			FLIP_BIT(out_buf, stage_cur);
+
+			/* While flipping the least significant bit in every byte, pull of an extra
+			   trick to detect possible syntax tokens. In essence, the idea is that if
+			   you have a binary blob like this:
+
+			   xxxxxxxxIHDRxxxxxxxx
+
+			   ...and changing the leading and trailing bytes causes variable or no
+			   changes in program flow, but touching any character in the "IHDR" string
+			   always produces the same, distinctive path, it's highly likely that
+			   "IHDR" is an atomically-checked magic value of special significance to
+			   the fuzzed format.
+
+			   We do this here, rather than as a separate stage, because it's a nice
+			   way to keep the operation approximately "free" (i.e., no extra execs).
+
+			   Empirically, performing the check when flipping the least significant bit
+			   is advantageous, compared to doing it at the time of more disruptive
+			   changes, where the program flow may be affected in more violent ways.
+
+			   The caveat is that we won't generate dictionaries in the -d mode or -S
+			   mode - but that's probably a fair trade-off.
+
+			   This won't work particularly well with paths that exhibit variable
+			   behavior, but fails gracefully, so we'll carry out the checks anyway.
+
+			  */
+
+			if (!dumb_mode && (stage_cur & 7) == 7) {
+
+				u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
+
+				if (stage_cur == stage_max - 1 && cksum == prev_cksum) {
+
+					/* If at end of file and we are still collecting a string, grab the
+					   final character and force output. */
+
+					if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3];
+					a_len++;
+
+					if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
+						maybe_add_auto(a_collect, a_len);
+
+				}
+				else if (cksum != prev_cksum) {
+
+					/* Otherwise, if the checksum has changed, see if we have something
+					   worthwhile queued up, and collect that if the answer is yes. */
+
+					if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
+						maybe_add_auto(a_collect, a_len);
+
+					a_len = 0;
+					prev_cksum = cksum;
+
+				}
+
+				/* Continue collecting string, but only if the bit flip actually made
+				   any difference - we don't want no-op tokens. */
+
+				if (cksum != queue_cur->exec_cksum) {
+
+					if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3];
+					a_len++;
+
+				}
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP1] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP1] += stage_max;
+
+		/* Two walking bits. */
+
+		stage_name = "bitflip 2/1";
+		stage_short = "flip2";
+		stage_max = (len << 3) - 1;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur >> 3;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP2] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP2] += stage_max;
+
+
+
+		/* Four walking bits. */
+
+		stage_name = "bitflip 4/1";
+		stage_short = "flip4";
+		stage_max = (len << 3) - 3;
+
+
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur >> 3;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+			FLIP_BIT(out_buf, stage_cur + 2);
+			FLIP_BIT(out_buf, stage_cur + 3);
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+			FLIP_BIT(out_buf, stage_cur + 2);
+			FLIP_BIT(out_buf, stage_cur + 3);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP4] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP4] += stage_max;
+
+
+
+
+		/* Effector map setup. These macros calculate:
+
+		   EFF_APOS      - position of a particular file offset in the map.
+		   EFF_ALEN      - length of a map with a particular number of bytes.
+		   EFF_SPAN_ALEN - map span for a sequence of bytes.
+
+		 */
+
+#define EFF_APOS(_p)          ((_p) >> EFF_MAP_SCALE2)
+#define EFF_REM(_x)           ((_x) & ((1 << EFF_MAP_SCALE2) - 1))
+#define EFF_ALEN(_l)          (EFF_APOS(_l) + !!EFF_REM(_l))
+#define EFF_SPAN_ALEN(_p, _l) (EFF_APOS((_p) + (_l) - 1) - EFF_APOS(_p) + 1)
+
+		 /* Initialize effector map for the next step (see comments below). Always
+			flag first and last byte as doing something. */
+
+		eff_map = ck_alloc(EFF_ALEN(len));
+		eff_map[0] = 1;
+
+		if (EFF_APOS(len - 1) != 0) {
+			eff_map[EFF_APOS(len - 1)] = 1;
+			eff_cnt++;
+		}
+
+		/* Walking byte. */
+
+		stage_name = "bitflip 8/8";
+		stage_short = "flip8";
+		stage_max = len;
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur;
+
+			out_buf[stage_cur] ^= 0xFF;
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			/* We also use this stage to pull off a simple trick: we identify
+			   bytes that seem to have no effect on the current execution path
+			   even when fully flipped - and we skip them during more expensive
+			   deterministic stages, such as arithmetics or known ints. */
+
+			if (!eff_map[EFF_APOS(stage_cur)]) {
+
+				u32 cksum;
+
+				/* If in dumb mode or if the file is very short, just flag everything
+				   without wasting time on checksums. */
+
+				if (!dumb_mode && len >= EFF_MIN_LEN)
+					cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
+				else
+					cksum = ~queue_cur->exec_cksum;
+
+				if (cksum != queue_cur->exec_cksum) {
+					eff_map[EFF_APOS(stage_cur)] = 1;
+					eff_cnt++;
+				}
+
+			}
+
+			out_buf[stage_cur] ^= 0xFF;
+
+		}
+
+		/* If the effector map is more than EFF_MAX_PERC dense, just flag the
+		   whole thing as worth fuzzing, since we wouldn't be saving much time
+		   anyway. */
+
+		if (eff_cnt != EFF_ALEN(len) &&
+			eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) {
+
+			memset(eff_map, 1, EFF_ALEN(len));
+
+			blocks_eff_select += EFF_ALEN(len);
+
+		}
+		else {
+
+			blocks_eff_select += eff_cnt;
+
+		}
+
+		blocks_eff_total += EFF_ALEN(len);
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP8] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP8] += stage_max;
+
+
+
+
+
+		/* Two walking bytes. */
+
+		if (len < 2) goto skip_bitflip;
+
+		stage_name = "bitflip 16/8";
+		stage_short = "flip16";
+		stage_cur = 0;
+		stage_max = len - 1;
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 1; i++) {
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
+				stage_max--;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			*(u16*)(out_buf + i) ^= 0xFFFF;
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+			stage_cur++;
+
+			*(u16*)(out_buf + i) ^= 0xFFFF;
+
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP16] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP16] += stage_max;
+
+
+
+
+		if (len < 4) goto skip_bitflip;
+
+		/* Four walking bytes. */
+
+		stage_name = "bitflip 32/8";
+		stage_short = "flip32";
+		stage_cur = 0;
+		stage_max = len - 3;
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 3; i++) {
+
+			/* Let's consult the effector map... */
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
+				!eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
+				stage_max--;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			*(u32*)(out_buf + i) ^= 0xFFFFFFFF;
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+			stage_cur++;
+
+			*(u32*)(out_buf + i) ^= 0xFFFFFFFF;
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP32] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP32] += stage_max;
+
+
+
+
+
+
+	skip_bitflip:
+
+		if (no_arith) goto skip_arith;
+
+		/**********************
+		 * ARITHMETIC INC/DEC *
+		 **********************/
+
+		 /* 8-bit arithmetics. */
+
+		stage_name = "arith 8/8";
+		stage_short = "arith8";
+		stage_cur = 0;
+		stage_max = 2 * len * ARITH_MAX;
+
+
+
+
+		stage_val_type = STAGE_VAL_LE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len; i++) {
+
+			u8 orig = out_buf[i];
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)]) {
+				stage_max -= 2 * ARITH_MAX;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 1; j <= ARITH_MAX; j++) {
+
+				u8 r = orig ^ (orig + j);
+
+				/* Do arithmetic operations only if the result couldn't be a product
+				   of a bitflip. */
+
+				if (!could_be_bitflip(r)) {
+
+					stage_cur_val = j;
+					out_buf[i] = orig + j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				r = orig ^ (orig - j);
+
+				if (!could_be_bitflip(r)) {
+
+					stage_cur_val = -j;
+					out_buf[i] = orig - j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				out_buf[i] = orig;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_ARITH8] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_ARITH8] += stage_max;
+
+
+
+
+
+		/* 16-bit arithmetics, both endians. */
+
+		if (len < 2) goto skip_arith;
+
+		stage_name = "arith 16/8";
+		stage_short = "arith16";
+		stage_cur = 0;
+		stage_max = 4 * (len - 1) * ARITH_MAX;
+
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 1; i++) {
+
+			u16 orig = *(u16*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
+				stage_max -= 4 * ARITH_MAX;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 1; j <= ARITH_MAX; j++) {
+
+				u16 r1 = orig ^ (orig + j),
+					r2 = orig ^ (orig - j),
+					r3 = orig ^ SWAP16(SWAP16(orig) + j),
+					r4 = orig ^ SWAP16(SWAP16(orig) - j);
+
+				/* Try little endian addition and subtraction first. Do it only
+				   if the operation would affect more than one byte (hence the
+				   & 0xff overflow checks) and if it couldn't be a product of
+				   a bitflip. */
+
+				stage_val_type = STAGE_VAL_LE;
+
+				if ((orig & 0xff) + j > 0xff && !could_be_bitflip(r1)) {
+
+					stage_cur_val = j;
+					*(u16*)(out_buf + i) = orig + j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((orig & 0xff) < j && !could_be_bitflip(r2)) {
+
+					stage_cur_val = -j;
+					*(u16*)(out_buf + i) = orig - j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				/* Big endian comes next. Same deal. */
+
+				stage_val_type = STAGE_VAL_BE;
+
+
+				if ((orig >> 8) + j > 0xff && !could_be_bitflip(r3)) {
+
+					stage_cur_val = j;
+					*(u16*)(out_buf + i) = SWAP16(SWAP16(orig) + j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((orig >> 8) < j && !could_be_bitflip(r4)) {
+
+					stage_cur_val = -j;
+					*(u16*)(out_buf + i) = SWAP16(SWAP16(orig) - j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				*(u16*)(out_buf + i) = orig;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_ARITH16] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_ARITH16] += stage_max;
+
+
+
+
+		/* 32-bit arithmetics, both endians. */
+
+		if (len < 4) goto skip_arith;
+
+		stage_name = "arith 32/8";
+		stage_short = "arith32";
+		stage_cur = 0;
+		stage_max = 4 * (len - 3) * ARITH_MAX;
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 3; i++) {
+
+			u32 orig = *(u32*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
+				!eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
+				stage_max -= 4 * ARITH_MAX;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 1; j <= ARITH_MAX; j++) {
+
+				u32 r1 = orig ^ (orig + j),
+					r2 = orig ^ (orig - j),
+					r3 = orig ^ SWAP32(SWAP32(orig) + j),
+					r4 = orig ^ SWAP32(SWAP32(orig) - j);
+
+				/* Little endian first. Same deal as with 16-bit: we only want to
+				   try if the operation would have effect on more than two bytes. */
+
+				stage_val_type = STAGE_VAL_LE;
+
+				if ((orig & 0xffff) + j > 0xffff && !could_be_bitflip(r1)) {
+
+					stage_cur_val = j;
+					*(u32*)(out_buf + i) = orig + j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((orig & 0xffff) < j && !could_be_bitflip(r2)) {
+
+					stage_cur_val = -j;
+					*(u32*)(out_buf + i) = orig - j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				/* Big endian next. */
+
+				stage_val_type = STAGE_VAL_BE;
+
+				if ((SWAP32(orig) & 0xffff) + j > 0xffff && !could_be_bitflip(r3)) {
+
+					stage_cur_val = j;
+					*(u32*)(out_buf + i) = SWAP32(SWAP32(orig) + j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((SWAP32(orig) & 0xffff) < j && !could_be_bitflip(r4)) {
+
+					stage_cur_val = -j;
+					*(u32*)(out_buf + i) = SWAP32(SWAP32(orig) - j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				*(u32*)(out_buf + i) = orig;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_ARITH32] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_ARITH32] += stage_max;
+
+
+
+
+	skip_arith:
+
+		/**********************
+		 * INTERESTING VALUES *
+		 **********************/
+
+		stage_name = "interest 8/8";
+		stage_short = "int8";
+		stage_cur = 0;
+		stage_max = len * sizeof(interesting_8);
+
+
+
+		stage_val_type = STAGE_VAL_LE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		/* Setting 8-bit integers. */
+
+		for (i = 0; i < len; i++) {
+
+			u8 orig = out_buf[i];
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)]) {
+				stage_max -= sizeof(interesting_8);
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < sizeof(interesting_8); j++) {
+
+				/* Skip if the value could be a product of bitflips or arithmetics. */
+
+				if (could_be_bitflip(orig ^ (u8)interesting_8[j]) ||
+					could_be_arith(orig, (u8)interesting_8[j], 1)) {
+					stage_max--;
+					continue;
+				}
+
+				stage_cur_val = interesting_8[j];
+				out_buf[i] = interesting_8[j];
+
+				if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+				out_buf[i] = orig;
+				stage_cur++;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_INTEREST8] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_INTEREST8] += stage_max;
+
+
+
+
+		/* Setting 16-bit integers, both endians. */
+
+		if (no_arith || len < 2) goto skip_interest;
+
+		stage_name = "interest 16/8";
+		stage_short = "int16";
+		stage_cur = 0;
+		stage_max = 2 * (len - 1) * (sizeof(interesting_16) >> 1);
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 1; i++) {
+
+			u16 orig = *(u16*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
+				stage_max -= sizeof(interesting_16);
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < sizeof(interesting_16) / 2; j++) {
+
+				stage_cur_val = interesting_16[j];
+
+				/* Skip if this could be a product of a bitflip, arithmetics,
+				   or single-byte interesting value insertion. */
+
+				if (!could_be_bitflip(orig ^ (u16)interesting_16[j]) &&
+					!could_be_arith(orig, (u16)interesting_16[j], 2) &&
+					!could_be_interest(orig, (u16)interesting_16[j], 2, 0)) {
+
+					stage_val_type = STAGE_VAL_LE;
+
+					*(u16*)(out_buf + i) = interesting_16[j];
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((u16)interesting_16[j] != SWAP16(interesting_16[j]) &&
+					!could_be_bitflip(orig ^ SWAP16(interesting_16[j])) &&
+					!could_be_arith(orig, SWAP16(interesting_16[j]), 2) &&
+					!could_be_interest(orig, SWAP16(interesting_16[j]), 2, 1)) {
+
+					stage_val_type = STAGE_VAL_BE;
+
+					*(u16*)(out_buf + i) = SWAP16(interesting_16[j]);
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+			}
+
+			*(u16*)(out_buf + i) = orig;
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_INTEREST16] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_INTEREST16] += stage_max;
+
+
+
+
+
+		if (len < 4) goto skip_interest;
+
+		/* Setting 32-bit integers, both endians. */
+
+		stage_name = "interest 32/8";
+		stage_short = "int32";
+		stage_cur = 0;
+		stage_max = 2 * (len - 3) * (sizeof(interesting_32) >> 2);
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 3; i++) {
+
+			u32 orig = *(u32*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
+				!eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
+				stage_max -= sizeof(interesting_32) >> 1;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < sizeof(interesting_32) / 4; j++) {
+
+				stage_cur_val = interesting_32[j];
+
+				/* Skip if this could be a product of a bitflip, arithmetics,
+				   or word interesting value insertion. */
+
+				if (!could_be_bitflip(orig ^ (u32)interesting_32[j]) &&
+					!could_be_arith(orig, interesting_32[j], 4) &&
+					!could_be_interest(orig, interesting_32[j], 4, 0)) {
+
+					stage_val_type = STAGE_VAL_LE;
+
+					*(u32*)(out_buf + i) = interesting_32[j];
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((u32)interesting_32[j] != SWAP32(interesting_32[j]) &&
+					!could_be_bitflip(orig ^ SWAP32(interesting_32[j])) &&
+					!could_be_arith(orig, SWAP32(interesting_32[j]), 4) &&
+					!could_be_interest(orig, SWAP32(interesting_32[j]), 4, 1)) {
+
+					stage_val_type = STAGE_VAL_BE;
+
+					*(u32*)(out_buf + i) = SWAP32(interesting_32[j]);
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+			}
+
+			*(u32*)(out_buf + i) = orig;
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_INTEREST32] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_INTEREST32] += stage_max;
+
+
+
+
+
+	skip_interest:
+
+		/********************
+		 * DICTIONARY STUFF *
+		 ********************/
+
+		if (!extras_cnt) goto skip_user_extras;
+
+		/* Overwrite with user-supplied extras. */
+
+		stage_name = "user extras (over)";
+		stage_short = "ext_UO";
+		stage_cur = 0;
+		stage_max = extras_cnt * len;
+
+
+
+
+		stage_val_type = STAGE_VAL_NONE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len; i++) {
+
+			u32 last_len = 0;
+
+			stage_cur_byte = i;
+
+			/* Extras are sorted by size, from smallest to largest. This means
+			   that we don't have to worry about restoring the buffer in
+			   between writes at a particular offset determined by the outer
+			   loop. */
+
+			for (j = 0; j < extras_cnt; j++) {
+
+				/* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also
+				   skip them if there's no room to insert the payload, if the token
+				   is redundant, or if its entire span has no bytes set in the effector
+				   map. */
+
+				if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) ||
+					extras[j].len > len - i ||
+					!memcmp(extras[j].data, out_buf + i, extras[j].len) ||
+					!memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) {
+
+					stage_max--;
+					continue;
+
+				}
+
+				last_len = extras[j].len;
+				memcpy(out_buf + i, extras[j].data, last_len);
+
+				if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+				stage_cur++;
+
+			}
+
+			/* Restore all the clobbered memory. */
+			memcpy(out_buf + i, in_buf + i, last_len);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_EXTRAS_UO] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_EXTRAS_UO] += stage_max;
+
+		/* Insertion of user-supplied extras. */
+
+		stage_name = "user extras (insert)";
+		stage_short = "ext_UI";
+		stage_cur = 0;
+		stage_max = extras_cnt * len;
+
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		ex_tmp = ck_alloc(len + MAX_DICT_FILE);
+
+		for (i = 0; i <= len; i++) {
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < extras_cnt; j++) {
+
+				if (len + extras[j].len > MAX_FILE) {
+					stage_max--;
+					continue;
+				}
+
+				/* Insert token */
+				memcpy(ex_tmp + i, extras[j].data, extras[j].len);
+
+				/* Copy tail */
+				memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i);
+
+				if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) {
+					ck_free(ex_tmp);
+					goto abandon_entry;
+				}
+
+				stage_cur++;
+
+			}
+
+			/* Copy head */
+			ex_tmp[i] = out_buf[i];
+
+		}
+
+		ck_free(ex_tmp);
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_EXTRAS_UI] += stage_max;
+
+	skip_user_extras:
+
+		if (!a_extras_cnt) goto skip_extras;
+
+		stage_name = "auto extras (over)";
+		stage_short = "ext_AO";
+		stage_cur = 0;
+		stage_max = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len;
+
+
+		stage_val_type = STAGE_VAL_NONE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len; i++) {
+
+			u32 last_len = 0;
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) {
+
+				/* See the comment in the earlier code; extras are sorted by size. */
+
+				if (a_extras[j].len > len - i ||
+					!memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) ||
+					!memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) {
+
+					stage_max--;
+					continue;
+
+				}
+
+				last_len = a_extras[j].len;
+				memcpy(out_buf + i, a_extras[j].data, last_len);
+
+				if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+				stage_cur++;
+
+			}
+
+			/* Restore all the clobbered memory. */
+			memcpy(out_buf + i, in_buf + i, last_len);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_EXTRAS_AO] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_EXTRAS_AO] += stage_max;
+
+	skip_extras:
+
+		/* If we made this to here without jumping to havoc_stage or abandon_entry,
+		   we're properly done with deterministic steps and can mark it as such
+		   in the .state/ directory. */
+
+		if (!queue_cur->passed_det) mark_as_det_done(queue_cur);
+
+		/****************
+		 * RANDOM HAVOC *
+		 ****************/
+
+	havoc_stage:
+	pacemaker_fuzzing:
+
+
+		stage_cur_byte = -1;
+
+		/* The havoc stage mutation code is also invoked when splicing files; if the
+		   splice_cycle variable is set, generate different descriptions and such. */
+
+		if (!splice_cycle) {
+
+			stage_name = "MOpt-havoc";
+			stage_short = "MOpt_havoc";
+			stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
+				perf_score / havoc_div / 100;
+
+		}
+		else {
+
+			static u8 tmp[32];
+
+			perf_score = orig_perf;
+
+			sprintf(tmp, "MOpt-splice %u", splice_cycle);
+			stage_name = tmp;
+			stage_short = "MOpt_splice";
+			stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100;
+
+		}
+
+		s32 temp_len_puppet;
+		cur_ms_lv = get_cur_time();
+
+		{
+
+
+			if (key_puppet == 1)
+			{
+				if (unlikely(orig_hit_cnt_puppet == 0))
+				{
+					orig_hit_cnt_puppet = queued_paths + unique_crashes;
+					last_limit_time_start = get_cur_time();
+					SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low);
+				}
+			}
+
+
+			{
+			havoc_stage_puppet:
+
+				stage_cur_byte = -1;
+
+				/* The havoc stage mutation code is also invoked when splicing files; if the
+				   splice_cycle variable is set, generate different descriptions and such. */
+
+				if (!splice_cycle) {
+
+					stage_name = "MOpt avoc";
+					stage_short = "MOpt_havoc";
+					stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
+						perf_score / havoc_div / 100;
+
+				}
+				else {
+					static u8 tmp[32];
+					perf_score = orig_perf;
+					sprintf(tmp, "MOpt splice %u", splice_cycle);
+					stage_name = tmp;
+					stage_short = "MOpt_splice";
+					stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100;
+				}
+
+
+
+				if (stage_max < HAVOC_MIN) stage_max = HAVOC_MIN;
+
+				temp_len = len;
+
+				orig_hit_cnt = queued_paths + unique_crashes;
+
+				havoc_queued = queued_paths;
+
+
+
+				for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+					u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2));
+
+					stage_cur_val = use_stacking;
+
+
+					for (i = 0; i < operator_num; i++)
+					{
+						stage_cycles_puppet_v3[swarm_now][i] = stage_cycles_puppet_v2[swarm_now][i];
+					}
+
+
+					for (i = 0; i < use_stacking; i++) {
+
+						switch (select_algorithm()) {
+
+						case 0:
+							/* Flip a single bit somewhere. Spooky! */
+							FLIP_BIT(out_buf, UR(temp_len << 3));
+							stage_cycles_puppet_v2[swarm_now][STAGE_FLIP1] += 1;
+							break;
+
+
+						case 1:
+							if (temp_len < 2) break;
+							temp_len_puppet = UR(temp_len << 3);
+							FLIP_BIT(out_buf, temp_len_puppet);
+							FLIP_BIT(out_buf, temp_len_puppet + 1);
+							stage_cycles_puppet_v2[swarm_now][STAGE_FLIP2] += 1;
+							break;
+
+						case 2:
+							if (temp_len < 2) break;
+							temp_len_puppet = UR(temp_len << 3);
+							FLIP_BIT(out_buf, temp_len_puppet);
+							FLIP_BIT(out_buf, temp_len_puppet + 1);
+							FLIP_BIT(out_buf, temp_len_puppet + 2);
+							FLIP_BIT(out_buf, temp_len_puppet + 3);
+							stage_cycles_puppet_v2[swarm_now][STAGE_FLIP4] += 1;
+							break;
+
+						case 3:
+							if (temp_len < 4) break;
+							out_buf[UR(temp_len)] ^= 0xFF;
+							stage_cycles_puppet_v2[swarm_now][STAGE_FLIP8] += 1;
+							break;
+
+						case 4:
+							if (temp_len < 8) break;
+							*(u16*)(out_buf + UR(temp_len - 1)) ^= 0xFFFF;
+							stage_cycles_puppet_v2[swarm_now][STAGE_FLIP16] += 1;
+							break;
+
+						case 5:
+							if (temp_len < 8) break;
+							*(u32*)(out_buf + UR(temp_len - 3)) ^= 0xFFFFFFFF;
+							stage_cycles_puppet_v2[swarm_now][STAGE_FLIP32] += 1;
+							break;
+
+						case 6:
+							out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX);
+							out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX);
+							stage_cycles_puppet_v2[swarm_now][STAGE_ARITH8] += 1;
+							break;
+
+						case 7:
+							/* Randomly subtract from word, random endian. */
+							if (temp_len < 8) break;
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 1);
+								*(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
+							}
+							else {
+								u32 pos = UR(temp_len - 1);
+								u16 num = 1 + UR(ARITH_MAX);
+								*(u16*)(out_buf + pos) =
+									SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num);
+							}
+							/* Randomly add to word, random endian. */
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 1);
+								*(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX);
+							}
+							else {
+								u32 pos = UR(temp_len - 1);
+								u16 num = 1 + UR(ARITH_MAX);
+								*(u16*)(out_buf + pos) =
+									SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num);
+							}
+							stage_cycles_puppet_v2[swarm_now][STAGE_ARITH16] += 1;
+							break;
+
+
+						case 8:
+							/* Randomly subtract from dword, random endian. */
+							if (temp_len < 8) break;
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 3);
+								*(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
+							}
+							else {
+								u32 pos = UR(temp_len - 3);
+								u32 num = 1 + UR(ARITH_MAX);
+								*(u32*)(out_buf + pos) =
+									SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num);
+							}
+							/* Randomly add to dword, random endian. */
+							//if (temp_len < 4) break;
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 3);
+								*(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX);
+							}
+							else {
+								u32 pos = UR(temp_len - 3);
+								u32 num = 1 + UR(ARITH_MAX);
+								*(u32*)(out_buf + pos) =
+									SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num);
+							}
+							stage_cycles_puppet_v2[swarm_now][STAGE_ARITH32] += 1;
+							break;
+
+
+						case 9:
+							/* Set byte to interesting value. */
+							if (temp_len < 4) break;
+							out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))];
+							stage_cycles_puppet_v2[swarm_now][STAGE_INTEREST8] += 1;
+							break;
+
+						case 10:
+							/* Set word to interesting value, randomly choosing endian. */
+							if (temp_len < 8) break;
+							if (UR(2)) {
+								*(u16*)(out_buf + UR(temp_len - 1)) =
+									interesting_16[UR(sizeof(interesting_16) >> 1)];
+							}
+							else {
+								*(u16*)(out_buf + UR(temp_len - 1)) = SWAP16(
+									interesting_16[UR(sizeof(interesting_16) >> 1)]);
+							}
+							stage_cycles_puppet_v2[swarm_now][STAGE_INTEREST16] += 1;
+							break;
+
+
+						case 11:
+							/* Set dword to interesting value, randomly choosing endian. */
+
+							if (temp_len < 8) break;
+
+							if (UR(2)) {
+								*(u32*)(out_buf + UR(temp_len - 3)) =
+									interesting_32[UR(sizeof(interesting_32) >> 2)];
+							}
+							else {
+								*(u32*)(out_buf + UR(temp_len - 3)) = SWAP32(
+									interesting_32[UR(sizeof(interesting_32) >> 2)]);
+							}
+							stage_cycles_puppet_v2[swarm_now][STAGE_INTEREST32] += 1;
+							break;
+
+
+						case 12:
+
+							/* Just set a random byte to a random value. Because,
+							   why not. We use XOR with 1-255 to eliminate the
+							   possibility of a no-op. */
+
+							out_buf[UR(temp_len)] ^= 1 + UR(255);
+							stage_cycles_puppet_v2[swarm_now][STAGE_RANDOMBYTE] += 1;
+							break;
+
+
+
+						case 13: {
+
+							/* Delete bytes. We're making this a bit more likely
+							   than insertion (the next option) in hopes of keeping
+							   files reasonably small. */
+
+							u32 del_from, del_len;
+
+							if (temp_len < 2) break;
+
+							/* Don't delete too much. */
+
+							del_len = choose_block_len(temp_len - 1);
+
+							del_from = UR(temp_len - del_len + 1);
+
+							memmove(out_buf + del_from, out_buf + del_from + del_len,
+								temp_len - del_from - del_len);
+
+							temp_len -= del_len;
+							stage_cycles_puppet_v2[swarm_now][STAGE_DELETEBYTE] += 1;
+							break;
+
+						}
+
+						case 14:
+
+							if (temp_len + HAVOC_BLK_XL < MAX_FILE) {
+
+								/* Clone bytes (75%) or insert a block of constant bytes (25%). */
+
+								u8  actually_clone = UR(4);
+								u32 clone_from, clone_to, clone_len;
+								u8* new_buf;
+
+								if (actually_clone) {
+
+									clone_len = choose_block_len(temp_len);
+									clone_from = UR(temp_len - clone_len + 1);
+
+								}
+								else {
+
+									clone_len = choose_block_len(HAVOC_BLK_XL);
+									clone_from = 0;
+
+								}
+
+								clone_to = UR(temp_len);
+
+								new_buf = ck_alloc_nozero(temp_len + clone_len);
+
+								/* Head */
+
+								memcpy(new_buf, out_buf, clone_to);
+
+								/* Inserted part */
+
+								if (actually_clone)
+									memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
+								else
+									memset(new_buf + clone_to,
+										UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len);
+
+								/* Tail */
+								memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
+									temp_len - clone_to);
+
+								ck_free(out_buf);
+								out_buf = new_buf;
+								temp_len += clone_len;
+								stage_cycles_puppet_v2[swarm_now][STAGE_Clone75] += 1;
+							}
+
+							break;
+
+						case 15: {
+
+							/* Overwrite bytes with a randomly selected chunk (75%) or fixed
+							   bytes (25%). */
+
+							u32 copy_from, copy_to, copy_len;
+
+							if (temp_len < 2) break;
+
+							copy_len = choose_block_len(temp_len - 1);
+
+							copy_from = UR(temp_len - copy_len + 1);
+							copy_to = UR(temp_len - copy_len + 1);
+
+							if (UR(4)) {
+
+								if (copy_from != copy_to)
+									memmove(out_buf + copy_to, out_buf + copy_from, copy_len);
+
+							}
+							else memset(out_buf + copy_to,
+								UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len);
+							stage_cycles_puppet_v2[swarm_now][STAGE_OverWrite75] += 1;
+							break;
+
+						}
+
+
+						}
+
+					}
+
+
+					tmp_pilot_time += 1;
+
+
+
+
+					u64 temp_total_found = queued_paths + unique_crashes;
+
+
+
+
+					if (common_fuzz_stuff(argv, out_buf, temp_len))
+						goto abandon_entry_puppet;
+
+					/* out_buf might have been mangled a bit, so let's restore it to its
+					   original size and shape. */
+
+					if (temp_len < len) out_buf = ck_realloc(out_buf, len);
+					temp_len = len;
+					memcpy(out_buf, in_buf, len);
+
+					/* If we're finding new stuff, let's run for a bit longer, limits
+					   permitting. */
+
+					if (queued_paths != havoc_queued) {
+
+						if (perf_score <= havoc_max_mult * 100) {
+							stage_max *= 2;
+							perf_score *= 2;
+						}
+
+						havoc_queued = queued_paths;
+
+					}
+
+					if (unlikely(queued_paths + unique_crashes > temp_total_found))
+					{
+						u64 temp_temp_puppet = queued_paths + unique_crashes - temp_total_found;
+						total_puppet_find = total_puppet_find + temp_temp_puppet;
+						for (i = 0; i < 16; i++)
+						{
+							if (stage_cycles_puppet_v2[swarm_now][i] > stage_cycles_puppet_v3[swarm_now][i])
+								stage_finds_puppet_v2[swarm_now][i] += temp_temp_puppet;
+						}
+					}
+
+				}
+				new_hit_cnt = queued_paths + unique_crashes;
+
+				if (!splice_cycle) {
+          stage_finds[STAGE_HAVOC]  += new_hit_cnt - orig_hit_cnt;
+          stage_cycles[STAGE_HAVOC] += stage_max;
+        } else {
+          stage_finds[STAGE_SPLICE]  += new_hit_cnt - orig_hit_cnt;
+          stage_cycles[STAGE_SPLICE] += stage_max;
+        }
+
+#ifndef IGNORE_FINDS
+
+				/************
+				 * SPLICING *
+				 ************/
+
+
+			retry_splicing_puppet:
+
+				if (use_splicing && splice_cycle++ < SPLICE_CYCLES_puppet &&
+					queued_paths > 1 && queue_cur->len > 1) {
+
+					struct queue_entry* target;
+					u32 tid, split_at;
+					u8* new_buf;
+					s32 f_diff, l_diff;
+
+					/* First of all, if we've modified in_buf for havoc, let's clean that
+					   up... */
+
+					if (in_buf != orig_in) {
+						ck_free(in_buf);
+						in_buf = orig_in;
+						len = queue_cur->len;
+					}
+
+					/* Pick a random queue entry and seek to it. Don't splice with yourself. */
+
+					do { tid = UR(queued_paths); } while (tid == current_entry);
+
+					splicing_with = tid;
+					target = queue;
+
+					while (tid >= 100) { target = target->next_100; tid -= 100; }
+					while (tid--) target = target->next;
+
+					/* Make sure that the target has a reasonable length. */
+
+					while (target && (target->len < 2 || target == queue_cur)) {
+						target = target->next;
+						splicing_with++;
+					}
+
+					if (!target) goto retry_splicing_puppet;
+
+					/* Read the testcase into a new buffer. */
+
+					fd = open(target->fname, O_RDONLY);
+
+					if (fd < 0) PFATAL("Unable to open '%s'", target->fname);
+
+					new_buf = ck_alloc_nozero(target->len);
+
+					ck_read(fd, new_buf, target->len, target->fname);
+
+					close(fd);
+
+					/* Find a suitable splicin g location, somewhere between the first and
+					   the last differing byte. Bail out if the difference is just a single
+					   byte or so. */
+
+					locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);
+
+					if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
+						ck_free(new_buf);
+						goto retry_splicing_puppet;
+					}
+
+					/* Split somewhere between the first and last differing byte. */
+
+					split_at = f_diff + UR(l_diff - f_diff);
+
+					/* Do the thing. */
+
+					len = target->len;
+					memcpy(new_buf, in_buf, split_at);
+					in_buf = new_buf;
+					ck_free(out_buf);
+					out_buf = ck_alloc_nozero(len);
+					memcpy(out_buf, in_buf, len);
+					goto havoc_stage_puppet;
+
+				}
+
+#endif /* !IGNORE_FINDS */
+
+				ret_val = 0;
+
+			abandon_entry:
+			abandon_entry_puppet:
+
+				if (splice_cycle >= SPLICE_CYCLES_puppet)
+					SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low);
+
+
+				splicing_with = -1;
+
+				/* Update pending_not_fuzzed count if we made it through the calibration
+				   cycle and have not seen this entry before. */
+
+				   // if (!stop_soon && !queue_cur->cal_failed && !queue_cur->was_fuzzed) {
+				   //   queue_cur->was_fuzzed = 1;
+				   //   pending_not_fuzzed--;
+				   //   if (queue_cur->favored) pending_favored--;
+				   // }
+
+				munmap(orig_in, queue_cur->len);
+
+				if (in_buf != orig_in) ck_free(in_buf);
+				ck_free(out_buf);
+				ck_free(eff_map);
+
+
+				if (key_puppet == 1) {
+					if (unlikely(queued_paths + unique_crashes > ((queued_paths + unique_crashes)*limit_time_bound + orig_hit_cnt_puppet)))	{
+						key_puppet = 0;
+						cur_ms_lv = get_cur_time();
+						new_hit_cnt = queued_paths + unique_crashes;
+						orig_hit_cnt_puppet = 0;
+						last_limit_time_start = 0;
+					}
+				}
+
+
+				if (unlikely(tmp_pilot_time > period_pilot)) {
+					total_pacemaker_time += tmp_pilot_time;
+					new_hit_cnt = queued_paths + unique_crashes;
+					swarm_fitness[swarm_now] = (double)(total_puppet_find - temp_puppet_find) / ((double)(tmp_pilot_time)/ period_pilot_tmp);
+					tmp_pilot_time = 0;
+					temp_puppet_find = total_puppet_find;
+
+					u64 temp_stage_finds_puppet = 0;
+					for (i = 0; i < operator_num; i++) {
+						double temp_eff = 0.0;
+
+						if (stage_cycles_puppet_v2[swarm_now][i] > stage_cycles_puppet[swarm_now][i])
+							temp_eff = (double)(stage_finds_puppet_v2[swarm_now][i] - stage_finds_puppet[swarm_now][i]) /
+							(double)(stage_cycles_puppet_v2[swarm_now][i] - stage_cycles_puppet[swarm_now][i]);
+
+						if (eff_best[swarm_now][i] < temp_eff) {
+							eff_best[swarm_now][i] = temp_eff;
+							L_best[swarm_now][i] = x_now[swarm_now][i];
+						}
+
+						stage_finds_puppet[swarm_now][i] = stage_finds_puppet_v2[swarm_now][i];
+						stage_cycles_puppet[swarm_now][i] = stage_cycles_puppet_v2[swarm_now][i];
+						temp_stage_finds_puppet += stage_finds_puppet[swarm_now][i];
+					}
+
+					swarm_now = swarm_now + 1;
+						if (swarm_now == swarm_num) {
+							key_module = 1;
+							for (i = 0; i < operator_num; i++) {
+								core_operator_cycles_puppet_v2[i] = core_operator_cycles_puppet[i];
+								core_operator_cycles_puppet_v3[i] = core_operator_cycles_puppet[i];
+								core_operator_finds_puppet_v2[i] = core_operator_finds_puppet[i];
+							}
+
+							double swarm_eff = 0.0;
+							swarm_now = 0;
+							for (i = 0; i < swarm_num; i++)	{
+								if (swarm_fitness[i] > swarm_eff) {
+									swarm_eff = swarm_fitness[i];
+									swarm_now = i;
+								}
+							}
+							if (swarm_now <0 || swarm_now > swarm_num - 1)
+								PFATAL("swarm_now error number  %d", swarm_now);
+
+						}
+				}
+				return ret_val;
+			}
+		}
+
+
+#undef FLIP_BIT
+
+}
+
+
+static u8 core_fuzzing(char** argv) {
+	int i;
+
+	if (swarm_num == 1) {
+		key_module = 2;
+		return 0;
+	}
+
+
+		s32 len, fd, temp_len, j;
+		u8  *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0;
+		u64 havoc_queued, orig_hit_cnt, new_hit_cnt, cur_ms_lv;
+		u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1;
+
+		u8  ret_val = 1, doing_det = 0;
+
+		u8  a_collect[MAX_AUTO_EXTRA];
+		u32 a_len = 0;
+
+#ifdef IGNORE_FINDS
+
+		/* In IGNORE_FINDS mode, skip any entries that weren't in the
+		   initial data set. */
+
+		if (queue_cur->depth > 1) return 1;
+
+#else
+
+		if (pending_favored) {
+
+			/* If we have any favored, non-fuzzed new arrivals in the queue,
+			   possibly skip to them at the expense of already-fuzzed or non-favored
+			   cases. */
+
+			if ((queue_cur->was_fuzzed || !queue_cur->favored) &&
+				UR(100) < SKIP_TO_NEW_PROB) return 1;
+
+		} else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) {
+
+			/* Otherwise, still possibly skip non-favored cases, albeit less often.
+			   The odds of skipping stuff are higher for already-fuzzed inputs and
+			   lower for never-fuzzed entries. */
+
+			if (queue_cycle > 1 && !queue_cur->was_fuzzed) {
+
+				if (UR(100) < SKIP_NFAV_NEW_PROB) return 1;
+
+			} else {
+
+				if (UR(100) < SKIP_NFAV_OLD_PROB) return 1;
+
+			}
+
+		}
+
+#endif /* ^IGNORE_FINDS */
+
+		if (not_on_tty) {
+			ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...",
+				current_entry, queued_paths, unique_crashes);
+			fflush(stdout);
+		}
+
+		/* Map the test case into memory. */
+
+		fd = open(queue_cur->fname, O_RDONLY);
+
+		if (fd < 0) PFATAL("Unable to open '%s'", queue_cur->fname);
+
+		len = queue_cur->len;
+
+		orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+		if (orig_in == MAP_FAILED) PFATAL("Unable to mmap '%s'", queue_cur->fname);
+
+		close(fd);
+
+		/* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every
+		   single byte anyway, so it wouldn't give us any performance or memory usage
+		   benefits. */
+
+		out_buf = ck_alloc_nozero(len);
+
+		subseq_tmouts = 0;
+
+		cur_depth = queue_cur->depth;
+
+		/*******************************************
+		 * CALIBRATION (only if failed earlier on) *
+		 *******************************************/
+
+		if (queue_cur->cal_failed) {
+
+			u8 res = FAULT_TMOUT;
+
+			if (queue_cur->cal_failed < CAL_CHANCES) {
+
+				res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0);
+
+				if (res == FAULT_ERROR)
+					FATAL("Unable to execute target application");
+
+			}
+
+			if (stop_soon || res != crash_mode) {
+				cur_skipped_paths++;
+				goto abandon_entry;
+			}
+
+		}
+
+		/************
+		 * TRIMMING *
+		 ************/
+
+		if (!dumb_mode && !queue_cur->trim_done) {
+
+			u8 res = trim_case(argv, queue_cur, in_buf);
+
+			if (res == FAULT_ERROR)
+				FATAL("Unable to execute target application");
+
+			if (stop_soon) {
+				cur_skipped_paths++;
+				goto abandon_entry;
+			}
+
+			/* Don't retry trimming, even if it failed. */
+
+			queue_cur->trim_done = 1;
+
+			if (len != queue_cur->len) len = queue_cur->len;
+
+		}
+
+		memcpy(out_buf, in_buf, len);
+
+		/*********************
+		 * PERFORMANCE SCORE *
+		 *********************/
+
+		orig_perf = perf_score = calculate_score(queue_cur);
+
+		/* Skip right away if -d is given, if we have done deterministic fuzzing on
+		   this entry ourselves (was_fuzzed), or if it has gone through deterministic
+		   testing in earlier, resumed runs (passed_det). */
+
+		if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det)
+			goto havoc_stage;
+
+		/* Skip deterministic fuzzing if exec path checksum puts this out of scope
+		   for this master instance. */
+
+		if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1)
+			goto havoc_stage;
+
+
+		cur_ms_lv = get_cur_time();
+		if (!(key_puppet == 0 && ((cur_ms_lv - last_path_time < limit_time_puppet) ||
+			(last_crash_time != 0 && cur_ms_lv - last_crash_time < limit_time_puppet) || last_path_time == 0)))
+		{
+			key_puppet = 1;
+			goto pacemaker_fuzzing;
+		}
+
+		doing_det = 1;
+
+		/*********************************************
+		 * SIMPLE BITFLIP (+dictionary construction) *
+		 *********************************************/
+
+#define FLIP_BIT(_ar, _b) do { \
+    u8* _arf = (u8*)(_ar); \
+    u32 _bf = (_b); \
+    _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \
+  } while (0)
+
+		 /* Single walking bit. */
+
+		stage_short = "flip1";
+		stage_max = len << 3;
+		stage_name = "bitflip 1/1";
+
+		stage_val_type = STAGE_VAL_NONE;
+
+		orig_hit_cnt = queued_paths + unique_crashes;
+
+		prev_cksum = queue_cur->exec_cksum;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur >> 3;
+
+			FLIP_BIT(out_buf, stage_cur);
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			FLIP_BIT(out_buf, stage_cur);
+
+			/* While flipping the least significant bit in every byte, pull of an extra
+			   trick to detect possible syntax tokens. In essence, the idea is that if
+			   you have a binary blob like this:
+
+			   xxxxxxxxIHDRxxxxxxxx
+
+			   ...and changing the leading and trailing bytes causes variable or no
+			   changes in program flow, but touching any character in the "IHDR" string
+			   always produces the same, distinctive path, it's highly likely that
+			   "IHDR" is an atomically-checked magic value of special significance to
+			   the fuzzed format.
+
+			   We do this here, rather than as a separate stage, because it's a nice
+			   way to keep the operation approximately "free" (i.e., no extra execs).
+
+			   Empirically, performing the check when flipping the least significant bit
+			   is advantageous, compared to doing it at the time of more disruptive
+			   changes, where the program flow may be affected in more violent ways.
+
+			   The caveat is that we won't generate dictionaries in the -d mode or -S
+			   mode - but that's probably a fair trade-off.
+
+			   This won't work particularly well with paths that exhibit variable
+			   behavior, but fails gracefully, so we'll carry out the checks anyway.
+
+			  */
+
+			if (!dumb_mode && (stage_cur & 7) == 7) {
+
+				u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
+
+				if (stage_cur == stage_max - 1 && cksum == prev_cksum) {
+
+					/* If at end of file and we are still collecting a string, grab the
+					   final character and force output. */
+
+					if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3];
+					a_len++;
+
+					if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
+						maybe_add_auto(a_collect, a_len);
+
+				}
+				else if (cksum != prev_cksum) {
+
+					/* Otherwise, if the checksum has changed, see if we have something
+					   worthwhile queued up, and collect that if the answer is yes. */
+
+					if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
+						maybe_add_auto(a_collect, a_len);
+
+					a_len = 0;
+					prev_cksum = cksum;
+
+				}
+
+				/* Continue collecting string, but only if the bit flip actually made
+				   any difference - we don't want no-op tokens. */
+
+				if (cksum != queue_cur->exec_cksum) {
+
+					if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3];
+					a_len++;
+
+				}
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP1] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP1] += stage_max;
+
+
+
+		/* Two walking bits. */
+
+		stage_name = "bitflip 2/1";
+		stage_short = "flip2";
+		stage_max = (len << 3) - 1;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur >> 3;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP2] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP2] += stage_max;
+
+
+		/* Four walking bits. */
+
+		stage_name = "bitflip 4/1";
+		stage_short = "flip4";
+		stage_max = (len << 3) - 3;
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur >> 3;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+			FLIP_BIT(out_buf, stage_cur + 2);
+			FLIP_BIT(out_buf, stage_cur + 3);
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			FLIP_BIT(out_buf, stage_cur);
+			FLIP_BIT(out_buf, stage_cur + 1);
+			FLIP_BIT(out_buf, stage_cur + 2);
+			FLIP_BIT(out_buf, stage_cur + 3);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP4] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP4] += stage_max;
+
+
+		/* Effector map setup. These macros calculate:
+
+		   EFF_APOS      - position of a particular file offset in the map.
+		   EFF_ALEN      - length of a map with a particular number of bytes.
+		   EFF_SPAN_ALEN - map span for a sequence of bytes.
+
+		 */
+
+#define EFF_APOS(_p)          ((_p) >> EFF_MAP_SCALE2)
+#define EFF_REM(_x)           ((_x) & ((1 << EFF_MAP_SCALE2) - 1))
+#define EFF_ALEN(_l)          (EFF_APOS(_l) + !!EFF_REM(_l))
+#define EFF_SPAN_ALEN(_p, _l) (EFF_APOS((_p) + (_l) - 1) - EFF_APOS(_p) + 1)
+
+		 /* Initialize effector map for the next step (see comments below). Always
+			flag first and last byte as doing something. */
+
+		eff_map = ck_alloc(EFF_ALEN(len));
+		eff_map[0] = 1;
+
+		if (EFF_APOS(len - 1) != 0) {
+			eff_map[EFF_APOS(len - 1)] = 1;
+			eff_cnt++;
+		}
+
+		/* Walking byte. */
+
+		stage_name = "bitflip 8/8";
+		stage_short = "flip8";
+		stage_max = len;
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+			stage_cur_byte = stage_cur;
+
+			out_buf[stage_cur] ^= 0xFF;
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+			/* We also use this stage to pull off a simple trick: we identify
+			   bytes that seem to have no effect on the current execution path
+			   even when fully flipped - and we skip them during more expensive
+			   deterministic stages, such as arithmetics or known ints. */
+
+			if (!eff_map[EFF_APOS(stage_cur)]) {
+
+				u32 cksum;
+
+				/* If in dumb mode or if the file is very short, just flag everything
+				   without wasting time on checksums. */
+
+				if (!dumb_mode && len >= EFF_MIN_LEN)
+					cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
+				else
+					cksum = ~queue_cur->exec_cksum;
+
+				if (cksum != queue_cur->exec_cksum) {
+					eff_map[EFF_APOS(stage_cur)] = 1;
+					eff_cnt++;
+				}
+
+			}
+
+			out_buf[stage_cur] ^= 0xFF;
+
+		}
+
+		/* If the effector map is more than EFF_MAX_PERC dense, just flag the
+		   whole thing as worth fuzzing, since we wouldn't be saving much time
+		   anyway. */
+
+		if (eff_cnt != EFF_ALEN(len) &&
+			eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) {
+
+			memset(eff_map, 1, EFF_ALEN(len));
+
+			blocks_eff_select += EFF_ALEN(len);
+
+		}
+		else {
+
+			blocks_eff_select += eff_cnt;
+
+		}
+
+		blocks_eff_total += EFF_ALEN(len);
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP8] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP8] += stage_max;
+
+
+
+		/* Two walking bytes. */
+
+		if (len < 2) goto skip_bitflip;
+
+		stage_name = "bitflip 16/8";
+		stage_short = "flip16";
+		stage_cur = 0;
+		stage_max = len - 1;
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 1; i++) {
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
+				stage_max--;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			*(u16*)(out_buf + i) ^= 0xFFFF;
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+			stage_cur++;
+
+			*(u16*)(out_buf + i) ^= 0xFFFF;
+
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP16] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP16] += stage_max;
+
+
+
+		if (len < 4) goto skip_bitflip;
+
+		/* Four walking bytes. */
+
+		stage_name = "bitflip 32/8";
+		stage_short = "flip32";
+		stage_cur = 0;
+		stage_max = len - 3;
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 3; i++) {
+
+			/* Let's consult the effector map... */
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
+				!eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
+				stage_max--;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			*(u32*)(out_buf + i) ^= 0xFFFFFFFF;
+
+			if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+			stage_cur++;
+
+			*(u32*)(out_buf + i) ^= 0xFFFFFFFF;
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_FLIP32] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_FLIP32] += stage_max;
+
+
+
+
+	skip_bitflip:
+
+		if (no_arith) goto skip_arith;
+
+		/**********************
+		 * ARITHMETIC INC/DEC *
+		 **********************/
+
+		 /* 8-bit arithmetics. */
+
+		stage_name = "arith 8/8";
+		stage_short = "arith8";
+		stage_cur = 0;
+		stage_max = 2 * len * ARITH_MAX;
+
+
+		stage_val_type = STAGE_VAL_LE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len; i++) {
+
+			u8 orig = out_buf[i];
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)]) {
+				stage_max -= 2 * ARITH_MAX;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 1; j <= ARITH_MAX; j++) {
+
+				u8 r = orig ^ (orig + j);
+
+				/* Do arithmetic operations only if the result couldn't be a product
+				   of a bitflip. */
+
+				if (!could_be_bitflip(r)) {
+
+					stage_cur_val = j;
+					out_buf[i] = orig + j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				r = orig ^ (orig - j);
+
+				if (!could_be_bitflip(r)) {
+
+					stage_cur_val = -j;
+					out_buf[i] = orig - j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				out_buf[i] = orig;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_ARITH8] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_ARITH8] += stage_max;
+
+
+
+
+		/* 16-bit arithmetics, both endians. */
+
+		if (len < 2) goto skip_arith;
+
+		stage_name = "arith 16/8";
+		stage_short = "arith16";
+		stage_cur = 0;
+		stage_max = 4 * (len - 1) * ARITH_MAX;
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 1; i++) {
+
+			u16 orig = *(u16*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
+				stage_max -= 4 * ARITH_MAX;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 1; j <= ARITH_MAX; j++) {
+
+				u16 r1 = orig ^ (orig + j),
+					r2 = orig ^ (orig - j),
+					r3 = orig ^ SWAP16(SWAP16(orig) + j),
+					r4 = orig ^ SWAP16(SWAP16(orig) - j);
+
+				/* Try little endian addition and subtraction first. Do it only
+				   if the operation would affect more than one byte (hence the
+				   & 0xff overflow checks) and if it couldn't be a product of
+				   a bitflip. */
+
+				stage_val_type = STAGE_VAL_LE;
+
+				if ((orig & 0xff) + j > 0xff && !could_be_bitflip(r1)) {
+
+					stage_cur_val = j;
+					*(u16*)(out_buf + i) = orig + j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((orig & 0xff) < j && !could_be_bitflip(r2)) {
+
+					stage_cur_val = -j;
+					*(u16*)(out_buf + i) = orig - j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				/* Big endian comes next. Same deal. */
+
+				stage_val_type = STAGE_VAL_BE;
+
+
+				if ((orig >> 8) + j > 0xff && !could_be_bitflip(r3)) {
+
+					stage_cur_val = j;
+					*(u16*)(out_buf + i) = SWAP16(SWAP16(orig) + j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((orig >> 8) < j && !could_be_bitflip(r4)) {
+
+					stage_cur_val = -j;
+					*(u16*)(out_buf + i) = SWAP16(SWAP16(orig) - j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				*(u16*)(out_buf + i) = orig;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_ARITH16] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_ARITH16] += stage_max;
+
+
+
+		/* 32-bit arithmetics, both endians. */
+
+		if (len < 4) goto skip_arith;
+
+		stage_name = "arith 32/8";
+		stage_short = "arith32";
+		stage_cur = 0;
+		stage_max = 4 * (len - 3) * ARITH_MAX;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 3; i++) {
+
+			u32 orig = *(u32*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
+				!eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
+				stage_max -= 4 * ARITH_MAX;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 1; j <= ARITH_MAX; j++) {
+
+				u32 r1 = orig ^ (orig + j),
+					r2 = orig ^ (orig - j),
+					r3 = orig ^ SWAP32(SWAP32(orig) + j),
+					r4 = orig ^ SWAP32(SWAP32(orig) - j);
+
+				/* Little endian first. Same deal as with 16-bit: we only want to
+				   try if the operation would have effect on more than two bytes. */
+
+				stage_val_type = STAGE_VAL_LE;
+
+				if ((orig & 0xffff) + j > 0xffff && !could_be_bitflip(r1)) {
+
+					stage_cur_val = j;
+					*(u32*)(out_buf + i) = orig + j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((orig & 0xffff) < j && !could_be_bitflip(r2)) {
+
+					stage_cur_val = -j;
+					*(u32*)(out_buf + i) = orig - j;
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				/* Big endian next. */
+
+				stage_val_type = STAGE_VAL_BE;
+
+				if ((SWAP32(orig) & 0xffff) + j > 0xffff && !could_be_bitflip(r3)) {
+
+					stage_cur_val = j;
+					*(u32*)(out_buf + i) = SWAP32(SWAP32(orig) + j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((SWAP32(orig) & 0xffff) < j && !could_be_bitflip(r4)) {
+
+					stage_cur_val = -j;
+					*(u32*)(out_buf + i) = SWAP32(SWAP32(orig) - j);
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				*(u32*)(out_buf + i) = orig;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_ARITH32] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_ARITH32] += stage_max;
+
+
+
+	skip_arith:
+
+		/**********************
+		 * INTERESTING VALUES *
+		 **********************/
+
+		stage_name = "interest 8/8";
+		stage_short = "int8";
+		stage_cur = 0;
+		stage_max = len * sizeof(interesting_8);
+
+
+
+		stage_val_type = STAGE_VAL_LE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		/* Setting 8-bit integers. */
+
+		for (i = 0; i < len; i++) {
+
+			u8 orig = out_buf[i];
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)]) {
+				stage_max -= sizeof(interesting_8);
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < sizeof(interesting_8); j++) {
+
+				/* Skip if the value could be a product of bitflips or arithmetics. */
+
+				if (could_be_bitflip(orig ^ (u8)interesting_8[j]) ||
+					could_be_arith(orig, (u8)interesting_8[j], 1)) {
+					stage_max--;
+					continue;
+				}
+
+				stage_cur_val = interesting_8[j];
+				out_buf[i] = interesting_8[j];
+
+				if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+				out_buf[i] = orig;
+				stage_cur++;
+
+			}
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_INTEREST8] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_INTEREST8] += stage_max;
+
+
+
+		/* Setting 16-bit integers, both endians. */
+
+		if (no_arith || len < 2) goto skip_interest;
+
+		stage_name = "interest 16/8";
+		stage_short = "int16";
+		stage_cur = 0;
+		stage_max = 2 * (len - 1) * (sizeof(interesting_16) >> 1);
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 1; i++) {
+
+			u16 orig = *(u16*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
+				stage_max -= sizeof(interesting_16);
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < sizeof(interesting_16) / 2; j++) {
+
+				stage_cur_val = interesting_16[j];
+
+				/* Skip if this could be a product of a bitflip, arithmetics,
+				   or single-byte interesting value insertion. */
+
+				if (!could_be_bitflip(orig ^ (u16)interesting_16[j]) &&
+					!could_be_arith(orig, (u16)interesting_16[j], 2) &&
+					!could_be_interest(orig, (u16)interesting_16[j], 2, 0)) {
+
+					stage_val_type = STAGE_VAL_LE;
+
+					*(u16*)(out_buf + i) = interesting_16[j];
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((u16)interesting_16[j] != SWAP16(interesting_16[j]) &&
+					!could_be_bitflip(orig ^ SWAP16(interesting_16[j])) &&
+					!could_be_arith(orig, SWAP16(interesting_16[j]), 2) &&
+					!could_be_interest(orig, SWAP16(interesting_16[j]), 2, 1)) {
+
+					stage_val_type = STAGE_VAL_BE;
+
+					*(u16*)(out_buf + i) = SWAP16(interesting_16[j]);
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+			}
+
+			*(u16*)(out_buf + i) = orig;
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_INTEREST16] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_INTEREST16] += stage_max;
+
+
+
+
+		if (len < 4) goto skip_interest;
+
+		/* Setting 32-bit integers, both endians. */
+
+		stage_name = "interest 32/8";
+		stage_short = "int32";
+		stage_cur = 0;
+		stage_max = 2 * (len - 3) * (sizeof(interesting_32) >> 2);
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len - 3; i++) {
+
+			u32 orig = *(u32*)(out_buf + i);
+
+			/* Let's consult the effector map... */
+
+			if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
+				!eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
+				stage_max -= sizeof(interesting_32) >> 1;
+				continue;
+			}
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < sizeof(interesting_32) / 4; j++) {
+
+				stage_cur_val = interesting_32[j];
+
+				/* Skip if this could be a product of a bitflip, arithmetics,
+				   or word interesting value insertion. */
+
+				if (!could_be_bitflip(orig ^ (u32)interesting_32[j]) &&
+					!could_be_arith(orig, interesting_32[j], 4) &&
+					!could_be_interest(orig, interesting_32[j], 4, 0)) {
+
+					stage_val_type = STAGE_VAL_LE;
+
+					*(u32*)(out_buf + i) = interesting_32[j];
+
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+				if ((u32)interesting_32[j] != SWAP32(interesting_32[j]) &&
+					!could_be_bitflip(orig ^ SWAP32(interesting_32[j])) &&
+					!could_be_arith(orig, SWAP32(interesting_32[j]), 4) &&
+					!could_be_interest(orig, SWAP32(interesting_32[j]), 4, 1)) {
+
+					stage_val_type = STAGE_VAL_BE;
+
+					*(u32*)(out_buf + i) = SWAP32(interesting_32[j]);
+					if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+					stage_cur++;
+
+				} else stage_max--;
+
+			}
+
+			*(u32*)(out_buf + i) = orig;
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_INTEREST32] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_INTEREST32] += stage_max;
+
+
+
+	skip_interest:
+
+		/********************
+		 * DICTIONARY STUFF *
+		 ********************/
+
+		if (!extras_cnt) goto skip_user_extras;
+
+		/* Overwrite with user-supplied extras. */
+
+		stage_name = "user extras (over)";
+		stage_short = "ext_UO";
+		stage_cur = 0;
+		stage_max = extras_cnt * len;
+
+
+		stage_val_type = STAGE_VAL_NONE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len; i++) {
+
+			u32 last_len = 0;
+
+			stage_cur_byte = i;
+
+			/* Extras are sorted by size, from smallest to largest. This means
+			   that we don't have to worry about restoring the buffer in
+			   between writes at a particular offset determined by the outer
+			   loop. */
+
+			for (j = 0; j < extras_cnt; j++) {
+
+				/* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also
+				   skip them if there's no room to insert the payload, if the token
+				   is redundant, or if its entire span has no bytes set in the effector
+				   map. */
+
+				if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) ||
+					extras[j].len > len - i ||
+					!memcmp(extras[j].data, out_buf + i, extras[j].len) ||
+					!memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) {
+
+					stage_max--;
+					continue;
+
+				}
+
+				last_len = extras[j].len;
+				memcpy(out_buf + i, extras[j].data, last_len);
+
+				if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+				stage_cur++;
+
+			}
+
+			/* Restore all the clobbered memory. */
+			memcpy(out_buf + i, in_buf + i, last_len);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_EXTRAS_UO] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_EXTRAS_UO] += stage_max;
+
+		/* Insertion of user-supplied extras. */
+
+		stage_name = "user extras (insert)";
+		stage_short = "ext_UI";
+		stage_cur = 0;
+		stage_max = extras_cnt * len;
+
+
+
+
+		orig_hit_cnt = new_hit_cnt;
+
+		ex_tmp = ck_alloc(len + MAX_DICT_FILE);
+
+		for (i = 0; i <= len; i++) {
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < extras_cnt; j++) {
+
+				if (len + extras[j].len > MAX_FILE) {
+					stage_max--;
+					continue;
+				}
+
+				/* Insert token */
+				memcpy(ex_tmp + i, extras[j].data, extras[j].len);
+
+				/* Copy tail */
+				memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i);
+
+				if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) {
+					ck_free(ex_tmp);
+					goto abandon_entry;
+				}
+
+				stage_cur++;
+
+			}
+
+			/* Copy head */
+			ex_tmp[i] = out_buf[i];
+
+		}
+
+		ck_free(ex_tmp);
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_EXTRAS_UI] += stage_max;
+
+	skip_user_extras:
+
+		if (!a_extras_cnt) goto skip_extras;
+
+		stage_name = "auto extras (over)";
+		stage_short = "ext_AO";
+		stage_cur = 0;
+		stage_max = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len;
+
+
+		stage_val_type = STAGE_VAL_NONE;
+
+		orig_hit_cnt = new_hit_cnt;
+
+		for (i = 0; i < len; i++) {
+
+			u32 last_len = 0;
+
+			stage_cur_byte = i;
+
+			for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) {
+
+				/* See the comment in the earlier code; extras are sorted by size. */
+
+				if (a_extras[j].len > len - i ||
+					!memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) ||
+					!memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) {
+
+					stage_max--;
+					continue;
+
+				}
+
+				last_len = a_extras[j].len;
+				memcpy(out_buf + i, a_extras[j].data, last_len);
+
+				if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
+
+				stage_cur++;
+
+			}
+
+			/* Restore all the clobbered memory. */
+			memcpy(out_buf + i, in_buf + i, last_len);
+
+		}
+
+		new_hit_cnt = queued_paths + unique_crashes;
+
+		stage_finds[STAGE_EXTRAS_AO] += new_hit_cnt - orig_hit_cnt;
+		stage_cycles[STAGE_EXTRAS_AO] += stage_max;
+
+	skip_extras:
+
+		/* If we made this to here without jumping to havoc_stage or abandon_entry,
+		   we're properly done with deterministic steps and can mark it as such
+		   in the .state/ directory. */
+
+		if (!queue_cur->passed_det) mark_as_det_done(queue_cur);
+
+		/****************
+		 * RANDOM HAVOC *
+		 ****************/
+
+	havoc_stage:
+	pacemaker_fuzzing:
+
+
+		stage_cur_byte = -1;
+
+		/* The havoc stage mutation code is also invoked when splicing files; if the
+		   splice_cycle variable is set, generate different descriptions and such. */
+
+		if (!splice_cycle) {
+
+			stage_name = "MOpt-havoc";
+			stage_short = "MOpt_havoc";
+			stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
+				perf_score / havoc_div / 100;
+
+		} else {
+
+			static u8 tmp[32];
+
+			perf_score = orig_perf;
+
+			sprintf(tmp, "MOpt-core-splice %u", splice_cycle);
+			stage_name = tmp;
+			stage_short = "MOpt_core_splice";
+			stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100;
+
+		}
+
+		s32 temp_len_puppet;
+		cur_ms_lv = get_cur_time();
+
+		//for (; swarm_now < swarm_num; swarm_now++)
+		{
+			if (key_puppet == 1) {
+				if (unlikely(orig_hit_cnt_puppet == 0)) {
+					orig_hit_cnt_puppet = queued_paths + unique_crashes;
+					last_limit_time_start = get_cur_time();
+					SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low);
+				}
+			}
+			{
+			havoc_stage_puppet:
+
+				stage_cur_byte = -1;
+
+				/* The havoc stage mutation code is also invoked when splicing files; if the
+				   splice_cycle variable is set, generate different descriptions and such. */
+
+				if (!splice_cycle) {
+					stage_name = "MOpt core avoc";
+					stage_short = "MOpt_core_havoc";
+					stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
+						perf_score / havoc_div / 100;
+				} else {
+					static u8 tmp[32];
+					perf_score = orig_perf;
+					sprintf(tmp, "MOpt core splice %u", splice_cycle);
+					stage_name = tmp;
+					stage_short = "MOpt_core_splice";
+					stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100;
+				}
+
+				if (stage_max < HAVOC_MIN) stage_max = HAVOC_MIN;
+				temp_len = len;
+				orig_hit_cnt = queued_paths + unique_crashes;
+				havoc_queued = queued_paths;
+
+				for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {
+
+					u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2));
+					stage_cur_val = use_stacking;
+
+					for (i = 0; i < operator_num; i++) {
+						core_operator_cycles_puppet_v3[i] = core_operator_cycles_puppet_v2[i];
+					}
+
+					for (i = 0; i < use_stacking; i++) {
+
+						switch (select_algorithm()) {
+
+						case 0:
+							/* Flip a single bit somewhere. Spooky! */
+							FLIP_BIT(out_buf, UR(temp_len << 3));
+							core_operator_cycles_puppet_v2[STAGE_FLIP1] += 1;
+							break;
+
+
+						case 1:
+							if (temp_len < 2) break;
+							temp_len_puppet = UR(temp_len << 3);
+							FLIP_BIT(out_buf, temp_len_puppet);
+							FLIP_BIT(out_buf, temp_len_puppet + 1);
+							core_operator_cycles_puppet_v2[STAGE_FLIP2] += 1;
+							break;
+
+						case 2:
+							if (temp_len < 2) break;
+							temp_len_puppet = UR(temp_len << 3);
+							FLIP_BIT(out_buf, temp_len_puppet);
+							FLIP_BIT(out_buf, temp_len_puppet + 1);
+							FLIP_BIT(out_buf, temp_len_puppet + 2);
+							FLIP_BIT(out_buf, temp_len_puppet + 3);
+							core_operator_cycles_puppet_v2[STAGE_FLIP4] += 1;
+							break;
+
+						case 3:
+							if (temp_len < 4) break;
+							out_buf[UR(temp_len)] ^= 0xFF;
+							core_operator_cycles_puppet_v2[STAGE_FLIP8] += 1;
+							break;
+
+						case 4:
+							if (temp_len < 8) break;
+							*(u16*)(out_buf + UR(temp_len - 1)) ^= 0xFFFF;
+							core_operator_cycles_puppet_v2[STAGE_FLIP16] += 1;
+							break;
+
+						case 5:
+							if (temp_len < 8) break;
+							*(u32*)(out_buf + UR(temp_len - 3)) ^= 0xFFFFFFFF;
+							core_operator_cycles_puppet_v2[STAGE_FLIP32] += 1;
+							break;
+
+						case 6:
+							out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX);
+							out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX);
+							core_operator_cycles_puppet_v2[STAGE_ARITH8] += 1;
+							break;
+
+						case 7:
+							/* Randomly subtract from word, random endian. */
+							if (temp_len < 8) break;
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 1);
+								*(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
+							} else {
+								u32 pos = UR(temp_len - 1);
+								u16 num = 1 + UR(ARITH_MAX);
+								*(u16*)(out_buf + pos) =
+									SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num);
+							}
+							/* Randomly add to word, random endian. */
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 1);
+								*(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX);
+							} else {
+								u32 pos = UR(temp_len - 1);
+								u16 num = 1 + UR(ARITH_MAX);
+								*(u16*)(out_buf + pos) =
+									SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num);
+							}
+							core_operator_cycles_puppet_v2[STAGE_ARITH16] += 1;
+							break;
+
+
+						case 8:
+							/* Randomly subtract from dword, random endian. */
+							if (temp_len < 8) break;
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 3);
+								*(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX);
+							} else {
+								u32 pos = UR(temp_len - 3);
+								u32 num = 1 + UR(ARITH_MAX);
+								*(u32*)(out_buf + pos) =
+									SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num);
+							}
+							/* Randomly add to dword, random endian. */
+							if (UR(2)) {
+								u32 pos = UR(temp_len - 3);
+								*(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX);
+							} else {
+								u32 pos = UR(temp_len - 3);
+								u32 num = 1 + UR(ARITH_MAX);
+								*(u32*)(out_buf + pos) =
+									SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num);
+							}
+							core_operator_cycles_puppet_v2[STAGE_ARITH32] += 1;
+							break;
+
+
+						case 9:
+							/* Set byte to interesting value. */
+							if (temp_len < 4) break;
+							out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))];
+							core_operator_cycles_puppet_v2[STAGE_INTEREST8] += 1;
+							break;
+
+						case 10:
+							/* Set word to interesting value, randomly choosing endian. */
+							if (temp_len < 8) break;
+							if (UR(2)) {
+								*(u16*)(out_buf + UR(temp_len - 1)) =
+									interesting_16[UR(sizeof(interesting_16) >> 1)];
+							} else {
+								*(u16*)(out_buf + UR(temp_len - 1)) = SWAP16(
+									interesting_16[UR(sizeof(interesting_16) >> 1)]);
+							}
+							core_operator_cycles_puppet_v2[STAGE_INTEREST16] += 1;
+							break;
+
+
+						case 11:
+							/* Set dword to interesting value, randomly choosing endian. */
+
+							if (temp_len < 8) break;
+
+							if (UR(2)) {
+								*(u32*)(out_buf + UR(temp_len - 3)) =
+									interesting_32[UR(sizeof(interesting_32) >> 2)];
+							} else {
+								*(u32*)(out_buf + UR(temp_len - 3)) = SWAP32(
+									interesting_32[UR(sizeof(interesting_32) >> 2)]);
+							}
+							core_operator_cycles_puppet_v2[STAGE_INTEREST32] += 1;
+							break;
+
+
+						case 12:
+
+							/* Just set a random byte to a random value. Because,
+							   why not. We use XOR with 1-255 to eliminate the
+							   possibility of a no-op. */
+
+							out_buf[UR(temp_len)] ^= 1 + UR(255);
+							core_operator_cycles_puppet_v2[STAGE_RANDOMBYTE] += 1;
+							break;
+
+
+						case 13: {
+
+							/* Delete bytes. We're making this a bit more likely
+							   than insertion (the next option) in hopes of keeping
+							   files reasonably small. */
+
+							u32 del_from, del_len;
+
+							if (temp_len < 2) break;
+
+							/* Don't delete too much. */
+
+							del_len = choose_block_len(temp_len - 1);
+
+							del_from = UR(temp_len - del_len + 1);
+
+							memmove(out_buf + del_from, out_buf + del_from + del_len,
+								temp_len - del_from - del_len);
+
+							temp_len -= del_len;
+							core_operator_cycles_puppet_v2[STAGE_DELETEBYTE] += 1;
+							break;
+
+						}
+
+						case 14:
+
+							if (temp_len + HAVOC_BLK_XL < MAX_FILE) {
+
+								/* Clone bytes (75%) or insert a block of constant bytes (25%). */
+
+								u8  actually_clone = UR(4);
+								u32 clone_from, clone_to, clone_len;
+								u8* new_buf;
+
+								if (actually_clone) {
+
+									clone_len = choose_block_len(temp_len);
+									clone_from = UR(temp_len - clone_len + 1);
+
+								} else {
+
+									clone_len = choose_block_len(HAVOC_BLK_XL);
+									clone_from = 0;
+
+								}
+
+								clone_to = UR(temp_len);
+
+								new_buf = ck_alloc_nozero(temp_len + clone_len);
+
+								/* Head */
+
+								memcpy(new_buf, out_buf, clone_to);
+
+								/* Inserted part */
+
+								if (actually_clone)
+									memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
+								else
+									memset(new_buf + clone_to,
+										UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len);
+
+								/* Tail */
+								memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
+									temp_len - clone_to);
+
+								ck_free(out_buf);
+								out_buf = new_buf;
+								temp_len += clone_len;
+								core_operator_cycles_puppet_v2[STAGE_Clone75] += 1;
+							}
+
+							break;
+
+						case 15: {
+
+							/* Overwrite bytes with a randomly selected chunk (75%) or fixed
+							   bytes (25%). */
+
+							u32 copy_from, copy_to, copy_len;
+
+							if (temp_len < 2) break;
+
+							copy_len = choose_block_len(temp_len - 1);
+
+							copy_from = UR(temp_len - copy_len + 1);
+							copy_to = UR(temp_len - copy_len + 1);
+
+							if (UR(4)) {
+
+								if (copy_from != copy_to)
+									memmove(out_buf + copy_to, out_buf + copy_from, copy_len);
+
+							}
+							else memset(out_buf + copy_to,
+								UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len);
+							core_operator_cycles_puppet_v2[STAGE_OverWrite75] += 1;
+							break;
+
+						}
+
+
+						}
+
+					}
+
+					tmp_core_time += 1;
+
+					u64 temp_total_found = queued_paths + unique_crashes;
+
+					if (common_fuzz_stuff(argv, out_buf, temp_len))
+						goto abandon_entry_puppet;
+
+					/* out_buf might have been mangled a bit, so let's restore it to its
+					   original size and shape. */
+
+					if (temp_len < len) out_buf = ck_realloc(out_buf, len);
+					temp_len = len;
+					memcpy(out_buf, in_buf, len);
+
+					/* If we're finding new stuff, let's run for a bit longer, limits
+					   permitting. */
+
+					if (queued_paths != havoc_queued) {
+
+						if (perf_score <= havoc_max_mult * 100) {
+							stage_max *= 2;
+							perf_score *= 2;
+						}
+
+						havoc_queued = queued_paths;
+
+					}
+
+					if (unlikely(queued_paths + unique_crashes > temp_total_found))
+					{
+						u64 temp_temp_puppet = queued_paths + unique_crashes - temp_total_found;
+						total_puppet_find = total_puppet_find + temp_temp_puppet;
+						for (i = 0; i < 16; i++)
+						{
+							if (core_operator_cycles_puppet_v2[i] > core_operator_cycles_puppet_v3[i])
+								core_operator_finds_puppet_v2[i] += temp_temp_puppet;
+						}
+					}
+
+				}
+
+				new_hit_cnt = queued_paths + unique_crashes;
+
+
+#ifndef IGNORE_FINDS
+
+				/************
+				 * SPLICING *
+				 ************/
+
+
+			retry_splicing_puppet:
+
+
+
+				if (use_splicing && splice_cycle++ < SPLICE_CYCLES_puppet &&
+					queued_paths > 1 && queue_cur->len > 1) {
+
+					struct queue_entry* target;
+					u32 tid, split_at;
+					u8* new_buf;
+					s32 f_diff, l_diff;
+
+					/* First of all, if we've modified in_buf for havoc, let's clean that
+					   up... */
+
+					if (in_buf != orig_in) {
+						ck_free(in_buf);
+						in_buf = orig_in;
+						len = queue_cur->len;
+					}
+
+					/* Pick a random queue entry and seek to it. Don't splice with yourself. */
+
+					do { tid = UR(queued_paths); } while (tid == current_entry);
+
+					splicing_with = tid;
+					target = queue;
+
+					while (tid >= 100) { target = target->next_100; tid -= 100; }
+					while (tid--) target = target->next;
+
+					/* Make sure that the target has a reasonable length. */
+
+					while (target && (target->len < 2 || target == queue_cur)) {
+						target = target->next;
+						splicing_with++;
+					}
+
+					if (!target) goto retry_splicing_puppet;
+
+					/* Read the testcase into a new buffer. */
+
+					fd = open(target->fname, O_RDONLY);
+
+					if (fd < 0) PFATAL("Unable to open '%s'", target->fname);
+
+					new_buf = ck_alloc_nozero(target->len);
+
+					ck_read(fd, new_buf, target->len, target->fname);
+
+					close(fd);
+
+					/* Find a suitable splicin g location, somewhere between the first and
+					   the last differing byte. Bail out if the difference is just a single
+					   byte or so. */
+
+					locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);
+
+					if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
+						ck_free(new_buf);
+						goto retry_splicing_puppet;
+					}
+
+					/* Split somewhere between the first and last differing byte. */
+
+					split_at = f_diff + UR(l_diff - f_diff);
+
+					/* Do the thing. */
+
+					len = target->len;
+					memcpy(new_buf, in_buf, split_at);
+					in_buf = new_buf;
+					ck_free(out_buf);
+					out_buf = ck_alloc_nozero(len);
+					memcpy(out_buf, in_buf, len);
+
+					goto havoc_stage_puppet;
+
+				}
+
+#endif /* !IGNORE_FINDS */
+
+				ret_val = 0;
+			abandon_entry:
+			abandon_entry_puppet:
+
+				if (splice_cycle >= SPLICE_CYCLES_puppet)
+					SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low);
+
+
+				splicing_with = -1;
+
+
+				munmap(orig_in, queue_cur->len);
+
+				if (in_buf != orig_in) ck_free(in_buf);
+				ck_free(out_buf);
+				ck_free(eff_map);
+
+
+				if (key_puppet == 1)
+				{
+					if (unlikely(queued_paths + unique_crashes > ((queued_paths + unique_crashes)*limit_time_bound + orig_hit_cnt_puppet)))
+					{
+						key_puppet = 0;
+						cur_ms_lv = get_cur_time();
+						new_hit_cnt = queued_paths + unique_crashes;
+						orig_hit_cnt_puppet = 0;
+						last_limit_time_start = 0;
+					}
+				}
+
+
+				if (unlikely(tmp_core_time > period_core))
+				{
+					total_pacemaker_time += tmp_core_time;
+					tmp_core_time = 0;
+					temp_puppet_find = total_puppet_find;
+					new_hit_cnt = queued_paths + unique_crashes;
+
+					u64 temp_stage_finds_puppet = 0;
+					for (i = 0; i < operator_num; i++)
+					{
+
+						core_operator_finds_puppet[i] = core_operator_finds_puppet_v2[i];
+						core_operator_cycles_puppet[i] = core_operator_cycles_puppet_v2[i];
+						temp_stage_finds_puppet += core_operator_finds_puppet[i];
+					}
+
+					key_module = 2;
+
+					old_hit_count = new_hit_cnt;
+				}
+				return ret_val;
+			}
+		}
+
+
+#undef FLIP_BIT
+
+}
+
+
+void pso_updating(void) {
+
+	g_now += 1;
+	if (g_now > g_max) g_now = 0;
+	w_now = (w_init - w_end)*(g_max - g_now) / (g_max)+w_end;
+	int tmp_swarm, i, j;
+	u64 temp_operator_finds_puppet = 0;
+	for (i = 0; i < operator_num; i++)
+	{
+		operator_finds_puppet[i] = core_operator_finds_puppet[i];
+
+		for (j = 0; j < swarm_num; j++)
+		{
+			operator_finds_puppet[i] = operator_finds_puppet[i] + stage_finds_puppet[j][i];
+		}
+		temp_operator_finds_puppet = temp_operator_finds_puppet + operator_finds_puppet[i];
+	}
+
+	for (i = 0; i < operator_num; i++)
+	{
+		if (operator_finds_puppet[i])
+			G_best[i] = (double)((double)(operator_finds_puppet[i]) / (double)(temp_operator_finds_puppet));
+	}
+
+	for (tmp_swarm = 0; tmp_swarm < swarm_num; tmp_swarm++)
+	{
+		double x_temp = 0.0;
+		for (i = 0; i < operator_num; i++)
+		{
+			probability_now[tmp_swarm][i] = 0.0;
+			v_now[tmp_swarm][i] = w_now * v_now[tmp_swarm][i] + RAND_C * (L_best[tmp_swarm][i] - x_now[tmp_swarm][i]) + RAND_C * (G_best[i] - x_now[tmp_swarm][i]);
+			x_now[tmp_swarm][i] += v_now[tmp_swarm][i];
+			if (x_now[tmp_swarm][i] > v_max)
+				x_now[tmp_swarm][i] = v_max;
+			else if (x_now[tmp_swarm][i] < v_min)
+				x_now[tmp_swarm][i] = v_min;
+			x_temp += x_now[tmp_swarm][i];
+		}
+
+		for (i = 0; i < operator_num; i++)
+		{
+			x_now[tmp_swarm][i] = x_now[tmp_swarm][i] / x_temp;
+			if (likely(i != 0))
+				probability_now[tmp_swarm][i] = probability_now[tmp_swarm][i - 1] + x_now[tmp_swarm][i];
+			else
+				probability_now[tmp_swarm][i] = x_now[tmp_swarm][i];
+		}
+		if (probability_now[tmp_swarm][operator_num - 1] < 0.99 || probability_now[tmp_swarm][operator_num - 1] > 1.01) FATAL("ERROR probability");
+	}
+	swarm_now = 0;
+	key_module = 0;
+}
+
+
+/* larger change for MOpt implementation: the original fuzz_one was renamed
+   to fuzz_one_original. All documentation references to fuzz_one therefore
+   mean fuzz_one_original */
+static u8 fuzz_one(char** argv) {
+	int key_val_lv = 0;
+	if (limit_time_sig == 0) {
+		key_val_lv = fuzz_one_original(argv);
+	} else {
+		if (key_module == 0)
+			key_val_lv = pilot_fuzzing(argv);
+		else if (key_module == 1)
+			key_val_lv = core_fuzzing(argv);
+		else if (key_module == 2)
+			pso_updating();
+	}
+
+	return key_val_lv;
+}
+
 
 /* Grab interesting test cases from other fuzzers. */
 
@@ -7535,12 +11168,14 @@ EXP_ST void check_binary(u8* fname) {
 
 #else
 
+#if !defined(__arm__) && !defined(__arm64__)
   if (f_data[0] != 0xCF || f_data[1] != 0xFA || f_data[2] != 0xED)
     FATAL("Program '%s' is not a 64-bit Mach-O binary", target_path);
+#endif
 
 #endif /* ^!__APPLE__ */
 
-  if (!qemu_mode && !dumb_mode &&
+  if (!qemu_mode && !unicorn_mode && !dumb_mode &&
       !memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
 
     SAYF("\n" cLRD "[-] " cRST
@@ -7560,15 +11195,15 @@ EXP_ST void check_binary(u8* fname) {
 
   }
 
-  if (qemu_mode &&
+  if ((qemu_mode || unicorn_mode) &&
       memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
 
     SAYF("\n" cLRD "[-] " cRST
          "This program appears to be instrumented with afl-gcc, but is being run in\n"
-         "    QEMU mode (-Q). This is probably not what you want - this setup will be\n"
-         "    slow and offer no practical benefits.\n");
+         "    QEMU or Unicorn mode (-Q or -U). This is probably not what you want -\n"
+         "    this setup will be slow and offer no practical benefits.\n");
 
-    FATAL("Instrumentation found in -Q mode");
+    FATAL("Instrumentation found in -Q or -U mode");
 
   }
 
@@ -7694,24 +11329,33 @@ static void usage(u8* argv0) {
        "  -f file       - location read by the fuzzed program (stdin)\n"
        "  -t msec       - timeout for each run (auto-scaled, 50-%u ms)\n"
        "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"     
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
+       "  -U            - use Unicorn-based instrumentation (Unicorn mode)\n\n"
+       "  -L minutes    - use MOpt(imize) mode and set the limit time for entering the\n"
+       "                  pacemaker mode (minutes of no new paths, 0 = immediately).\n"
+       "                  a recommended value is 10-60. see docs/README.MOpt\n\n"
  
        "Fuzzing behavior settings:\n"
        "  -d            - quick & dirty mode (skips deterministic steps)\n"
        "  -n            - fuzz without instrumentation (dumb mode)\n"
        "  -x dir        - optional fuzzer dictionary (see README)\n\n"
 
+       "Testing settings:\n"
+       "  -s seed       - use a fixed seed for the RNG\n"
+       "  -V seconds    - fuzz for a maximum total time of seconds then terminate\n"
+       "  -E execs      - fuzz for a maximum number of total executions then terminate\n\n"
+
        "Other stuff:\n"
        "  -T text       - text banner to show on the screen\n"
        "  -M / -S id    - distributed mode (see parallel_fuzzing.txt)\n"
+       "  -B bitmap.txt - mutate a specific test case, use the out/fuzz_bitmap file\n"
        "  -C            - crash exploration mode (the peruvian rabbit thing)\n"
-       "  -s seed       - use a fixed seed for the rng - important to testing\n"
        "  -e ext        - File extension for the temporarily generated test case\n\n"
 
 #ifdef USE_PYTHON
        "Compiled with Python 2.7 module support, see docs/python_mutators.txt\n"
 #endif
-       "For additional tips, please consult %s/README.\n\n",
+       "For additional tips, please consult %s/README\n\n",
 
        argv0, EXEC_TIMEOUT, MEM_LIMIT, doc_path);
 
@@ -7871,7 +11515,6 @@ static void setup_cmdline_file(char** argv) {
 
 EXP_ST void setup_stdio_file(void) {
 
-  //u8* fn = alloc_printf("%s/.cur_input", tmp_dir);
   u8* fn;
   if (file_extension) {
     fn = alloc_printf("%s/.cur_input.%s", out_dir, file_extension);
@@ -7959,15 +11602,29 @@ static void check_crash_handling(void) {
 /* Check CPU governor. */
 
 static void check_cpu_governor(void) {
-
+#ifdef __linux__
   FILE* f;
   u8 tmp[128];
   u64 min = 0, max = 0;
 
   if (getenv("AFL_SKIP_CPUFREQ")) return;
 
+  if (cpu_aff > 0)
+    snprintf(tmp, sizeof(tmp), "%s%d%s", "/sys/devices/system/cpu/cpu", cpu_aff, "/cpufreq/scaling_governor");
+  else
+    snprintf(tmp, sizeof(tmp), "%s", "/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor");
   f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", "r");
-  if (!f) return;
+  if (!f) {
+    if (cpu_aff > 0)
+      snprintf(tmp, sizeof(tmp), "%s%d%s", "/sys/devices/system/cpu/cpufreq/policy", cpu_aff, "/scaling_governor");
+    else
+      snprintf(tmp, sizeof(tmp), "%s", "/sys/devices/system/cpu/cpufreq/policy0/scaling_governor");
+    f = fopen(tmp, "r");
+  }
+  if (!f) {
+    WARNF("Could not check CPU scaling governor");
+    return;
+  }
 
   ACTF("Checking CPU scaling governor...");
 
@@ -8008,7 +11665,7 @@ static void check_cpu_governor(void) {
        min / 1024, max / 1024);
 
   FATAL("Suboptimal CPU scaling governor");
-
+#endif
 }
 
 
@@ -8179,58 +11836,6 @@ static void check_asan_opts(void) {
 } 
 
 
-/* Detect @@ in args. */
-
-EXP_ST void detect_file_args(char** argv) {
-
-  u32 i = 0;
-  u8* cwd = getcwd(NULL, 0);
-
-  if (!cwd) PFATAL("getcwd() failed");
-
-  while (argv[i]) {
-
-    u8* aa_loc = strstr(argv[i], "@@");
-
-    if (aa_loc) {
-
-      u8 *aa_subst, *n_arg;
-
-      /* If we don't have a file name chosen yet, use a safe default. */
-
-      if (!out_file) {
-        if (file_extension) {
-            out_file = alloc_printf("%s/.cur_input.%s", out_dir, file_extension);
-        } else {
-            out_file = alloc_printf("%s/.cur_input", out_dir);
-        }
-      }
-
-      /* Be sure that we're always using fully-qualified paths. */
-
-      if (out_file[0] == '/') aa_subst = out_file;
-      else aa_subst = alloc_printf("%s/%s", cwd, out_file);
-
-      /* Construct a replacement argv value. */
-
-      *aa_loc = 0;
-      n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
-      argv[i] = n_arg;
-      *aa_loc = '@';
-
-      if (out_file[0] != '/') ck_free(aa_subst);
-
-    }
-
-    i++;
-
-  }
-
-  free(cwd); /* not tracked */
-
-}
-
-
 /* Set up signal handlers. More complicated that needs to be, because libc on
    Solaris doesn't resume interrupted reads(), sets SA_RESETHAND when you call
    siginterrupt(), and does other stupid things. */
@@ -8344,7 +11949,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 
 }
 
-
 /* Make a copy of the current command line. */
 
 static void save_cmdline(u32 argc, char** argv) {
@@ -8399,14 +12003,14 @@ int main(int argc, char** argv) {
   struct timeval tv;
   struct timezone tz;
 
-  SAYF(cCYA "afl-fuzz" VERSION cRST " by <lcamtuf@google.com>, schedules by <marcel.boehme@acm.org>\n");
+  SAYF(cCYA "afl-fuzz" VERSION cRST " based on afl by <lcamtuf@google.com> and a big online community\n");
 
   doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
 
   gettimeofday(&tv, &tz);
   init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
 
-  while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Qe:p:s:")) > 0)
+  while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:QUe:p:s:V:E:L:")) > 0)
 
     switch (opt) {
 
@@ -8607,6 +12211,115 @@ int main(int argc, char** argv) {
 
         break;
 
+      case 'U': /* Unicorn mode */
+
+        if (unicorn_mode) FATAL("Multiple -U options not supported");
+        unicorn_mode = 1;
+
+        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
+
+        break;
+
+      case 'V': {
+           most_time_key = 1;
+           if (sscanf(optarg, "%llu", &most_time) < 1 || optarg[0] == '-')
+             FATAL("Bad syntax used for -V");
+        }
+        break;
+
+      case 'E': {
+           most_execs_key = 1;
+           if (sscanf(optarg, "%llu", &most_execs) < 1 || optarg[0] == '-')
+             FATAL("Bad syntax used for -E");
+        }
+        break;
+
+      case 'L': { /* MOpt mode */
+
+              if (limit_time_sig)  FATAL("Multiple -L options not supported");
+              limit_time_sig = 1;
+              havoc_max_mult = HAVOC_MAX_MULT_MOPT;
+
+			if (sscanf(optarg, "%llu", &limit_time_puppet) < 1 ||
+				optarg[0] == '-') FATAL("Bad syntax used for -L");
+
+			u64 limit_time_puppet2 = limit_time_puppet * 60 * 1000;
+
+			if (limit_time_puppet2 < limit_time_puppet ) FATAL("limit_time overflow");
+				limit_time_puppet = limit_time_puppet2;
+
+			SAYF("limit_time_puppet %llu\n",limit_time_puppet);
+			swarm_now = 0;
+
+			if (limit_time_puppet == 0 )
+			    key_puppet = 1;
+
+			int i;
+			int tmp_swarm = 0;
+
+			if (g_now > g_max) g_now = 0;
+			w_now = (w_init - w_end)*(g_max - g_now) / (g_max)+w_end;
+
+			for (tmp_swarm = 0; tmp_swarm < swarm_num; tmp_swarm++) {
+				double total_puppet_temp = 0.0;
+				swarm_fitness[tmp_swarm] = 0.0;
+
+				for (i = 0; i < operator_num; i++) {
+					stage_finds_puppet[tmp_swarm][i] = 0;
+					probability_now[tmp_swarm][i] = 0.0;
+					x_now[tmp_swarm][i] = ((double)(random() % 7000)*0.0001 + 0.1);
+					total_puppet_temp += x_now[tmp_swarm][i];
+					v_now[tmp_swarm][i] = 0.1;
+					L_best[tmp_swarm][i] = 0.5;
+					G_best[i] = 0.5;
+					eff_best[tmp_swarm][i] = 0.0;
+
+				}
+
+				for (i = 0; i < operator_num; i++) {
+					stage_cycles_puppet_v2[tmp_swarm][i] = stage_cycles_puppet[tmp_swarm][i];
+					stage_finds_puppet_v2[tmp_swarm][i] = stage_finds_puppet[tmp_swarm][i];
+					x_now[tmp_swarm][i] = x_now[tmp_swarm][i] / total_puppet_temp;
+				}
+
+				double x_temp = 0.0;
+
+				for (i = 0; i < operator_num; i++) {
+					probability_now[tmp_swarm][i] = 0.0;
+					v_now[tmp_swarm][i] = w_now * v_now[tmp_swarm][i] + RAND_C * (L_best[tmp_swarm][i] - x_now[tmp_swarm][i]) + RAND_C * (G_best[i] - x_now[tmp_swarm][i]);
+
+					x_now[tmp_swarm][i] += v_now[tmp_swarm][i];
+
+					if (x_now[tmp_swarm][i] > v_max)
+						x_now[tmp_swarm][i] = v_max;
+					else if (x_now[tmp_swarm][i] < v_min)
+						x_now[tmp_swarm][i] = v_min;
+
+					x_temp += x_now[tmp_swarm][i];
+				}
+
+				for (i = 0; i < operator_num; i++) {
+					x_now[tmp_swarm][i] = x_now[tmp_swarm][i] / x_temp;
+					if (likely(i != 0))
+						probability_now[tmp_swarm][i] = probability_now[tmp_swarm][i - 1] + x_now[tmp_swarm][i];
+					else
+						probability_now[tmp_swarm][i] = x_now[tmp_swarm][i];
+				}
+				if (probability_now[tmp_swarm][operator_num - 1] < 0.99 || probability_now[tmp_swarm][operator_num - 1] > 1.01)
+                                    FATAL("ERROR probability");
+			}
+
+			for (i = 0; i < operator_num; i++) {
+				core_operator_finds_puppet[i] = 0;
+				core_operator_finds_puppet_v2[i] = 0;
+				core_operator_cycles_puppet[i] = 0;
+				core_operator_cycles_puppet_v2[i] = 0;
+				core_operator_cycles_puppet_v3[i] = 0;
+			}
+
+        }
+        break;
+
       default:
 
         usage(argv[0]);
@@ -8640,9 +12353,20 @@ int main(int argc, char** argv) {
 
     if (crash_mode) FATAL("-C and -n are mutually exclusive");
     if (qemu_mode)  FATAL("-Q and -n are mutually exclusive");
+    if (unicorn_mode) FATAL("-U and -n are mutually exclusive");
 
   }
 
+  if (index(argv[optind], '/') == NULL) WARNF(cLRD "Target binary called without a prefixed path, make sure you are fuzzing the right binary: " cRST "%s", argv[optind]);
+
+  OKF("afl++ is maintained by Marc \"van Hauser\" Heuse, Heiko \"hexcoder\" Eissfeldt and Andrea Fioraldi");
+  OKF("afl++ is open source, get it at https://github.com/vanhauser-thc/AFLplusplus");
+  OKF("Power schedules from github.com/mboehme/aflfast");
+  OKF("Python Mutator and llvm_mode whitelisting from github.com/choller/afl");
+  OKF("afl-tmin fork server patch from github.com/nccgroup/TriforceAFL");
+  OKF("MOpt Mutator from github.com/puppet-meteor/MOpt-AFL");
+  ACTF("Getting to work...");
+
   switch (schedule) {
     case FAST:    OKF ("Using exponential power schedule (FAST)"); break;
     case COE:     OKF ("Using cut-off exponential power schedule (COE)"); break;
@@ -8709,7 +12433,12 @@ int main(int argc, char** argv) {
 
   setup_post();
   setup_custom_mutator();
-  setup_shm();
+  setup_shm(dumb_mode);
+
+  if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE);
+  memset(virgin_tmout, 255, MAP_SIZE);
+  memset(virgin_crash, 255, MAP_SIZE);
+
   init_count_class16();
 
   setup_dirs_fds();
@@ -8735,7 +12464,28 @@ int main(int argc, char** argv) {
 
   if (!timeout_given) find_timeout();
 
-  detect_file_args(argv + optind + 1);
+  /* If we don't have a file name chosen yet, use a safe default. */
+
+  if (!out_file) {
+    u32 i = optind + 1;
+    while (argv[i]) {
+
+      u8* aa_loc = strstr(argv[i], "@@");
+
+      if (aa_loc && !out_file) {
+        if (file_extension) {
+          out_file = alloc_printf("%s/.cur_input.%s", out_dir, file_extension);
+        } else {
+          out_file = alloc_printf("%s/.cur_input", out_dir);
+        }
+        detect_file_args(argv + optind + 1, out_file);
+	break;
+      }
+
+      i++;
+
+    }
+  }
 
   if (!out_file) setup_stdio_file();
 
@@ -8769,6 +12519,9 @@ int main(int argc, char** argv) {
     if (stop_soon) goto stop_fuzzing;
   }
 
+  // real start time, we reset, so this works correctly with -V
+  start_time = get_cur_time();
+
   while (1) {
 
     u8 skipped_fuzz;
@@ -8827,6 +12580,19 @@ int main(int argc, char** argv) {
     queue_cur = queue_cur->next;
     current_entry++;
 
+    if (most_time_key == 1) {
+      u64 cur_ms_lv = get_cur_time();
+      if (most_time * 1000 < cur_ms_lv  - start_time) {
+        most_time_key = 2;
+        break;
+      }
+    }
+    if (most_execs_key == 1) {
+      if (most_execs <= total_execs) {
+        most_execs_key = 2;
+        break;
+      }
+    }
   }
 
   if (queue_cur) show_stats();
@@ -8840,13 +12606,18 @@ stop_fuzzing:
   SAYF(CURSOR_SHOW cLRD "\n\n+++ Testing aborted %s +++\n" cRST,
        stop_soon == 2 ? "programmatically" : "by user");
 
+  if (most_time_key == 2)
+    SAYF(cYEL "[!] " cRST "Time limit was reached\n");
+  if (most_execs_key == 2)
+    SAYF(cYEL "[!] " cRST "Execution limit was reached\n");
+
   /* Running for more than 30 minutes but still doing first cycle? */
 
   if (queue_cycle == 1 && get_cur_time() - start_time > 30 * 60 * 1000) {
 
     SAYF("\n" cYEL "[!] " cRST
            "Stopped during the first cycle, results may be incomplete.\n"
-           "    (For info on resuming, see %s/README.)\n", doc_path);
+           "    (For info on resuming, see %s/README)\n", doc_path);
 
   }
 
diff --git a/afl-gcc.c b/afl-gcc.c
index 8d3988c7..f6ededeb 100644
--- a/afl-gcc.c
+++ b/afl-gcc.c
@@ -252,6 +252,10 @@ static void edit_params(u32 argc, char** argv) {
 
   }
 
+#ifdef USEMMAP
+    cc_params[cc_par_cnt++] = "-lrt";
+#endif
+
   if (!getenv("AFL_DONT_OPTIMIZE")) {
 
 #if defined(__FreeBSD__) && defined(__x86_64__)
@@ -304,6 +308,7 @@ int main(int argc, char** argv) {
   if (isatty(2) && !getenv("AFL_QUIET")) {
 
     SAYF(cCYA "afl-cc" VERSION cRST " by <lcamtuf@google.com>\n");
+    SAYF(cYEL "[!] " cBRI "NOTE: " cRST "afl-gcc is deprecated, llvm_mode is much faster and has more options\n");
 
   } else be_quiet = 1;
 
diff --git a/afl-showmap.c b/afl-showmap.c
index 316490d8..baf8352e 100644
--- a/afl-showmap.c
+++ b/afl-showmap.c
@@ -28,6 +28,8 @@
 #include "debug.h"
 #include "alloc-inl.h"
 #include "hash.h"
+#include "sharedmem.h"
+#include "afl-common.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -48,7 +50,7 @@
 
 static s32 child_pid;                 /* PID of the tested program         */
 
-static u8* trace_bits;                /* SHM with instrumentation bitmap   */
+       u8* trace_bits;                /* SHM with instrumentation bitmap   */
 
 static u8 *out_file,                  /* Trace output file                 */
           *doc_path,                  /* Path to docs                      */
@@ -59,8 +61,6 @@ static u32 exec_tmout;                /* Exec timeout (ms)                 */
 
 static u64 mem_limit = MEM_LIMIT;     /* Memory limit (MB)                 */
 
-static s32 shm_id;                    /* ID of the SHM region              */
-
 static u8  quiet_mode,                /* Hide non-essential messages?      */
            edges_only,                /* Ignore hit counts?                */
            cmin_mode,                 /* Generate output in afl-cmin mode? */
@@ -126,39 +126,6 @@ static void classify_counts(u8* mem, const u8* map) {
 }
 
 
-/* Get rid of shared memory (atexit handler). */
-
-static void remove_shm(void) {
-
-  shmctl(shm_id, IPC_RMID, NULL);
-
-}
-
-
-/* Configure shared memory. */
-
-static void setup_shm(void) {
-
-  u8* shm_str;
-
-  shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
-
-  if (shm_id < 0) PFATAL("shmget() failed");
-
-  atexit(remove_shm);
-
-  shm_str = alloc_printf("%d", shm_id);
-
-  setenv(SHM_ENV_VAR, shm_str, 1);
-
-  ck_free(shm_str);
-
-  trace_bits = shmat(shm_id, NULL, 0);
-  
-  if (!trace_bits) PFATAL("shmat() failed");
-
-}
-
 /* Write results. */
 
 static u32 write_results(void) {
@@ -413,50 +380,6 @@ static void setup_signal_handlers(void) {
 }
 
 
-/* Detect @@ in args. */
-
-static void detect_file_args(char** argv) {
-
-  u32 i = 0;
-  u8* cwd = getcwd(NULL, 0);
-
-  if (!cwd) PFATAL("getcwd() failed");
-
-  while (argv[i]) {
-
-    u8* aa_loc = strstr(argv[i], "@@");
-
-    if (aa_loc) {
-
-      u8 *aa_subst, *n_arg;
-
-      if (!at_file) FATAL("@@ syntax is not supported by this tool.");
-
-      /* Be sure that we're always using fully-qualified paths. */
-
-      if (at_file[0] == '/') aa_subst = at_file;
-      else aa_subst = alloc_printf("%s/%s", cwd, at_file);
-
-      /* Construct a replacement argv value. */
-
-      *aa_loc = 0;
-      n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
-      argv[i] = n_arg;
-      *aa_loc = '@';
-
-      if (at_file[0] != '/') ck_free(aa_subst);
-
-    }
-
-    i++;
-
-  }
-
-  free(cwd); /* not tracked */
-
-}
-
-
 /* Show banner. */
 
 static void show_banner(void) {
@@ -481,7 +404,9 @@ static void usage(u8* argv0) {
 
        "  -t msec       - timeout for each run (none)\n"
        "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
+       "  -U            - use Unicorn-based instrumentation (Unicorn mode)\n"
+       "                  (Not necessary, here for consistency with other afl-* tools)\n\n"  
 
        "Other settings:\n\n"
 
@@ -610,19 +535,18 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 
 }
 
-
 /* Main entry point */
 
 int main(int argc, char** argv) {
 
   s32 opt;
-  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
+  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
   u32 tcnt;
   char** use_argv;
 
   doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
 
-  while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQbc")) > 0)
+  while ((opt = getopt(argc,argv,"+o:m:t:A:eqZQUbc")) > 0)
 
     switch (opt) {
 
@@ -719,6 +643,14 @@ int main(int argc, char** argv) {
         qemu_mode = 1;
         break;
 
+      case 'U':
+
+        if (unicorn_mode) FATAL("Multiple -U options not supported");
+        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
+
+        unicorn_mode = 1;
+        break;
+
       case 'b':
 
         /* Secret undocumented mode. Writes output in raw binary format
@@ -741,7 +673,7 @@ int main(int argc, char** argv) {
 
   if (optind == argc || !out_file) usage(argv[0]);
 
-  setup_shm();
+  setup_shm(0);
   setup_signal_handlers();
 
   set_up_environment();
@@ -753,7 +685,7 @@ int main(int argc, char** argv) {
     ACTF("Executing '%s'...\n", target_path);
   }
 
-  detect_file_args(argv + optind);
+  detect_file_args(argv + optind, at_file);
 
   if (qemu_mode)
     use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
diff --git a/afl-system-config b/afl-system-config
index 7538bc29..366762ef 100755
--- a/afl-system-config
+++ b/afl-system-config
@@ -1,5 +1,9 @@
 #!/bin/sh
 echo This reconfigures the system to have a better fuzzing performance
+if [ '!' "$EUID" = 0 ] && [ '!' `id -u` = 0 ] ; then
+	echo Error you need to be root to run this
+	exit 1
+fi
 sysctl -w kernel.core_pattern=core
 sysctl -w kernel.randomize_va_space=0
 sysctl -w kernel.sched_child_runs_first=1
@@ -7,8 +11,11 @@ sysctl -w kernel.sched_autogroup_enabled=1
 sysctl -w kernel.sched_migration_cost_ns=50000000
 sysctl -w kernel.sched_latency_ns=250000000
 echo never > /sys/kernel/mm/transparent_hugepage/enabled
-echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor > /dev/null
+test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor
+test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor
+test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
 test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo
+test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost
 echo
 echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
 echo '/etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"'
diff --git a/afl-tmin.c b/afl-tmin.c
index a42be6e9..94f3bb3f 100644
--- a/afl-tmin.c
+++ b/afl-tmin.c
@@ -26,6 +26,8 @@
 #include "debug.h"
 #include "alloc-inl.h"
 #include "hash.h"
+#include "sharedmem.h"
+#include "afl-common.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -50,8 +52,8 @@ static s32 forksrv_pid,               /* PID of the fork server           */
 static s32 fsrv_ctl_fd,               /* Fork server control pipe (write) */
            fsrv_st_fd;                /* Fork server status pipe (read)   */
 
-static u8 *trace_bits,                /* SHM with instrumentation bitmap   */
-          *mask_bitmap;               /* Mask for trace bits (-B)          */
+       u8 *trace_bits;                /* SHM with instrumentation bitmap   */
+static u8 *mask_bitmap;               /* Mask for trace bits (-B)          */
 
 static u8 *in_file,                   /* Minimizer input test case         */
           *out_file,                  /* Minimizer output file             */
@@ -73,8 +75,7 @@ static u32 in_len,                    /* Input data length                 */
 
 static u64 mem_limit = MEM_LIMIT;     /* Memory limit (MB)                 */
 
-static s32 shm_id,                    /* ID of the SHM region              */
-           dev_null_fd = -1;          /* FD to /dev/null                   */
+static s32 dev_null_fd = -1;          /* FD to /dev/null                   */
 
 static u8  crash_mode,                /* Crash-centric mode?               */
            exit_crash,                /* Treat non-zero exit as crash?     */
@@ -159,42 +160,12 @@ static inline u8 anything_set(void) {
 }
 
 
+/* Get rid of temp files (atexit handler). */
 
-/* Get rid of shared memory and temp files (atexit handler). */
-
-static void remove_shm(void) {
-
+static void at_exit_handler(void) {
   if (prog_in) unlink(prog_in); /* Ignore errors */
-  shmctl(shm_id, IPC_RMID, NULL);
-
 }
 
-
-/* Configure shared memory. */
-
-static void setup_shm(void) {
-
-  u8* shm_str;
-
-  shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
-
-  if (shm_id < 0) PFATAL("shmget() failed");
-
-  atexit(remove_shm);
-
-  shm_str = alloc_printf("%d", shm_id);
-
-  setenv(SHM_ENV_VAR, shm_str, 1);
-
-  ck_free(shm_str);
-
-  trace_bits = shmat(shm_id, NULL, 0);
-  
-  if (!trace_bits) PFATAL("shmat() failed");
-
-}
-
-
 /* Read initial file. */
 
 static void read_initial_file(void) {
@@ -700,7 +671,7 @@ next_del_blksize:
   alpha_del1   = 0;
   syms_removed = 0;
 
-  memset(alpha_map, 0, 256 * sizeof(u32));
+  memset(alpha_map, 0, sizeof(alpha_map));
 
   for (i = 0; i < in_len; i++) {
     if (!alpha_map[in_data[i]]) alpha_size++;
@@ -911,48 +882,6 @@ static void setup_signal_handlers(void) {
 }
 
 
-/* Detect @@ in args. */
-
-static void detect_file_args(char** argv) {
-
-  u32 i = 0;
-  u8* cwd = getcwd(NULL, 0);
-
-  if (!cwd) PFATAL("getcwd() failed");
-
-  while (argv[i]) {
-
-    u8* aa_loc = strstr(argv[i], "@@");
-
-    if (aa_loc) {
-
-      u8 *aa_subst, *n_arg;
-
-      /* Be sure that we're always using fully-qualified paths. */
-
-      if (prog_in[0] == '/') aa_subst = prog_in;
-      else aa_subst = alloc_printf("%s/%s", cwd, prog_in);
-
-      /* Construct a replacement argv value. */
-
-      *aa_loc = 0;
-      n_arg = alloc_printf("%s%s%s", argv[i], aa_subst, aa_loc + 2);
-      argv[i] = n_arg;
-      *aa_loc = '@';
-
-      if (prog_in[0] != '/') ck_free(aa_subst);
-
-    }
-
-    i++;
-
-  }
-
-  free(cwd); /* not tracked */
-
-}
-
-
 /* Display usage hints. */
 
 static void usage(u8* argv0) {
@@ -969,7 +898,9 @@ static void usage(u8* argv0) {
        "  -f file       - input file read by the tested program (stdin)\n"
        "  -t msec       - timeout for each run (%u ms)\n"
        "  -m megs       - memory limit for child process (%u MB)\n"
-       "  -Q            - use binary-only instrumentation (QEMU mode)\n\n"
+       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
+       "  -U            - use Unicorn-based instrumentation (Unicorn mode)\n\n"
+       "                  (Not necessary, here for consistency with other afl-* tools)\n\n"
 
        "Minimization settings:\n\n"
 
@@ -1096,7 +1027,6 @@ static char** get_qemu_argv(u8* own_loc, char** argv, int argc) {
 
 }
 
-
 /* Read mask bitmap from file. This is for the -B option. */
 
 static void read_bitmap(u8* fname) {
@@ -1118,14 +1048,14 @@ static void read_bitmap(u8* fname) {
 int main(int argc, char** argv) {
 
   s32 opt;
-  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0;
+  u8  mem_limit_given = 0, timeout_given = 0, qemu_mode = 0, unicorn_mode = 0;
   char** use_argv;
 
   doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH;
 
   SAYF(cCYA "afl-tmin" VERSION cRST " by <lcamtuf@google.com>\n");
 
-  while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQ")) > 0)
+  while ((opt = getopt(argc,argv,"+i:o:f:m:t:B:xeQU")) > 0)
 
     switch (opt) {
 
@@ -1217,6 +1147,14 @@ int main(int argc, char** argv) {
         qemu_mode = 1;
         break;
 
+      case 'U':
+
+        if (unicorn_mode) FATAL("Multiple -Q options not supported");
+        if (!mem_limit_given) mem_limit = MEM_LIMIT_UNICORN;
+
+        unicorn_mode = 1;
+        break;
+
       case 'B': /* load bitmap */
 
         /* This is a secret undocumented option! It is speculated to be useful
@@ -1245,13 +1183,14 @@ int main(int argc, char** argv) {
 
   if (optind == argc || !in_file || !out_file) usage(argv[0]);
 
-  setup_shm();
+  setup_shm(0);
+  atexit(at_exit_handler);
   setup_signal_handlers();
 
   set_up_environment();
 
   find_binary(argv[optind]);
-  detect_file_args(argv + optind);
+  detect_file_args(argv + optind, prog_in);
 
   if (qemu_mode)
     use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
diff --git a/alloc-inl.h b/alloc-inl.h
index d3c125fb..04f56d0d 100644
--- a/alloc-inl.h
+++ b/alloc-inl.h
@@ -83,10 +83,22 @@
           ABORT("Use after free."); \
         else ABORT("Corrupted head alloc canary."); \
       } \
+   } \
+  } while (0)
+
+/*
+#define CHECK_PTR(_p) do { \
+    if (_p) { \
+      if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\
+        if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \
+          ABORT("Use after free."); \
+        else ABORT("Corrupted head alloc canary."); \
+      } \
       if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \
         ABORT("Corrupted tail alloc canary."); \
     } \
   } while (0)
+*/
 
 #define CHECK_PTR_EXPR(_p) ({ \
     typeof (_p) _tmp = (_p); \
diff --git a/config.h b/config.h
index cebf7c39..37a2a794 100644
--- a/config.h
+++ b/config.h
@@ -21,7 +21,7 @@
 
 /* Version string: */
 
-#define VERSION             "++2.52d"
+#define VERSION             "++2.53d"  // c = release, d = volatile github dev
 
 /******************************************************
  *                                                    *
@@ -59,6 +59,10 @@
 
 #define MEM_LIMIT_QEMU      200
 
+/* Default memory limit when running in Unicorn mode (MB): */
+
+#define MEM_LIMIT_UNICORN   200
+
 /* Number of calibration cycles per every new test case (and for test
    cases that show variable behavior): */
 
@@ -83,6 +87,7 @@
    of 32-bit int overflows): */
 
 #define HAVOC_MAX_MULT      16
+#define HAVOC_MAX_MULT_MOPT 32
 
 /* Absolute minimum number of havoc cycles (after all adjustments): */
 
diff --git a/docs/ChangeLog b/docs/ChangeLog
index 0d730118..dfb2e4e7 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -13,10 +13,39 @@ Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 
------------------------------
-Version ++2.52d (tbd):
------------------------------
-
+----------------------
+Version ++2.53d (dev):
+----------------------
+
+  - ... your patch? :)
+
+
+
+--------------------------
+Version ++2.53c (release):
+--------------------------
+
+  - README is now README.md
+  - imported the few minor changes from the 2.53b release
+  - unicorn_mode got added - thanks to domenukk for the patch!
+  - fix llvm_mode AFL_TRACE_PC with modern llvm
+  - fix a crash in qemu_mode which also exists in stock afl
+  - added libcompcov, a laf-intel implementation for qemu! :)
+    see qemu_mode/libcompcov/README.libcompcov
+  - afl-fuzz now displays the selected core in the status screen (blue {#})
+  - updated afl-fuzz and afl-system-config for new scaling governor location
+    in modern kernels
+  - using the old ineffective afl-gcc will now show a deprecation warning
+  - all queue, hang and crash files now have their discovery time in their name
+  - if llvm_mode was compiled, afl-clang/afl-clang++ will point to these
+    instead of afl-gcc
+  - added instrim, a much faster llvm_mode instrumentation at the cost of
+    path discovery. See llvm_mode/README.instrim (https://github.com/csienslab/instrim)
+  - added MOpt (github.com/puppet-meteor/MOpt-AFL) mode, see docs/README.MOpt
+  - added code to make it more portable to other platforms than Intel Linux
+  - added never zero counters for afl-gcc and optionally (because of an
+    optimization issue in llvm < 9) for llvm_mode (AFL_LLVM_NEVER_ZERO=1)
+  - added a new doc about binary only fuzzing: docs/binaryonly_fuzzing.txt
   - more cpu power for afl-system-config
   - added forkserver patch to afl-tmin, makes it much faster (originally from
     github.com/nccgroup/TriforceAFL)
@@ -27,11 +56,13 @@ Version ++2.52d (tbd):
     see docs/python_mutators.txt (originally by choller@mozilla)
   - added AFL_CAL_FAST for slow applications and AFL_DEBUG_CHILD_OUTPUT for
     debugging
-  - added a  -s seed  switch to allow afl run with a fixed initial
-    seed that is not updated. this is good for performance and path discovery
+  - added -V time and -E execs option to better comparison runs, runs afl-fuzz
+    for a specific time/executions.
+  - added a -s seed switch to allow afl run with a fixed initial
+    seed that is not updated. This is good for performance and path discovery
     tests as the random numbers are deterministic then
-  - ... your idea or patch?
-
+  - llvm_mode LAF_... env variables can now be specified as AFL_LLVM_LAF_...
+    that is longer but in line with other llvm specific env vars
 
 
 -----------------------------
@@ -41,7 +72,7 @@ Version ++2.52c (2019-06-05):
   - Applied community patches. See docs/PATCHES for the full list.
     LLVM and Qemu modes are now faster.
     Important changes:
-      afl-fuzz: -e EXTENSION  commandline option
+      afl-fuzz: -e EXTENSION commandline option
       llvm_mode: LAF-intel performance (needs activation, see llvm/README.laf-intel)
       a few new environment variables for afl-fuzz, llvm and qemu, see docs/env_variables.txt
   - Added the power schedules of AFLfast by Marcel Boehme, but set the default
diff --git a/docs/PATCHES b/docs/PATCHES
index cb050218..8b188814 100644
--- a/docs/PATCHES
+++ b/docs/PATCHES
@@ -17,6 +17,9 @@ afl-qemu-optimize-entrypoint.diff	by mh(at)mh-sec(dot)de
 afl-qemu-speed.diff			by abiondo on github
 afl-qemu-optimize-map.diff		by mh(at)mh-sec(dot)de
 
++ unicorn_mode (modernized and updated by domenukk)
++ instrim (https://github.com/csienslab/instrim) was integrated
++ MOpt (github.com/puppet-meteor/MOpt-AFL) was imported
 + AFLfast additions (github.com/mboehme/aflfast) were incorporated.
 + Qemu 3.1 upgrade with enhancement patches (github.com/andreafioraldi/afl)
 + Python mutator modules support (github.com/choller/afl)
diff --git a/docs/QuickStartGuide.txt b/docs/QuickStartGuide.txt
index af4fe75f..9190dc98 100644
--- a/docs/QuickStartGuide.txt
+++ b/docs/QuickStartGuide.txt
@@ -2,7 +2,7 @@
 AFL quick start guide
 =====================
 
-You should read docs/README. It's pretty short. If you really can't, here's
+You should read docs/README.md - it's pretty short. If you really can't, here's
 how to hit the ground running:
 
 1) Compile AFL with 'make'. If build fails, see docs/INSTALL for tips.
@@ -17,7 +17,7 @@ how to hit the ground running:
 
    The program must crash properly when a fault is encountered. Watch out for
    custom SIGSEGV or SIGABRT handlers and background processes. For tips on
-   detecting non-crashing flaws, see section 11 in docs/README.
+   detecting non-crashing flaws, see section 11 in docs/README.md .
 
 3) Compile the program / library to be fuzzed using afl-gcc. A common way to
    do this would be:
@@ -48,7 +48,7 @@ how to hit the ground running:
 That's it. Sit back, relax, and - time permitting - try to skim through the
 following files:
 
-  - docs/README               - A general introduction to AFL,
+  - docs/README.md            - A general introduction to AFL,
   - docs/perf_tips.txt        - Simple tips on how to fuzz more quickly,
   - docs/status_screen.txt    - An explanation of the tidbits shown in the UI,
   - docs/parallel_fuzzing.txt - Advice on running AFL on multiple cores.
diff --git a/docs/README.MOpt b/docs/README.MOpt
new file mode 100644
index 00000000..94e63959
--- /dev/null
+++ b/docs/README.MOpt
@@ -0,0 +1,51 @@
+# MOpt(imized) AFL by <puppet@zju.edu.cn>
+
+### 1. Description
+MOpt-AFL is a AFL-based fuzzer that utilizes a customized Particle Swarm
+Optimization (PSO) algorithm to find the optimal selection probability
+distribution of operators with respect to fuzzing effectiveness.
+More details can be found in the technical report.
+
+### 2. Cite Information
+Chenyang Lyu, Shouling Ji, Chao Zhang, Yuwei Li, Wei-Han Lee, Yu Song and
+Raheem Beyah, MOPT: Optimized Mutation Scheduling for Fuzzers,
+USENIX Security 2019. 
+
+### 3. Seed Sets
+We open source all the seed sets used in the paper 
+"MOPT: Optimized Mutation Scheduling for Fuzzers".
+
+### 4. Experiment Results
+The experiment results can be found in 
+https://drive.google.com/drive/folders/184GOzkZGls1H2NuLuUfSp9gfqp1E2-lL?usp=sharing.
+We only open source the crash files since the space is limited. 
+
+### 5. Technical Report
+MOpt_TechReport.pdf is the technical report of the paper 
+"MOPT: Optimized Mutation Scheduling for Fuzzers", which contains more deatails.
+
+### 6. Parameter Introduction
+Most important, you must add the parameter `-L` (e.g., `-L 0`) to launch the
+MOpt scheme. 
+
+Option '-L' controls the time to move on to the pacemaker fuzzing mode.
+'-L t': when MOpt-AFL finishes the mutation of one input, if it has not
+discovered any new unique crash or path for more than t minutes, MOpt-AFL will
+enter the pacemaker fuzzing mode. 
+
+Setting 0 will enter the pacemaker fuzzing mode at first, which is
+recommended in a short time-scale evaluation. 
+
+Other important parameters can be found in afl-fuzz.c, for instance, 
+
+'swarm_num': the number of the PSO swarms used in the fuzzing process.
+'period_pilot': how many times MOpt-AFL will execute the target program
+	in the pilot fuzzing module, then it will enter the core fuzzing module.
+'period_core': how many times MOpt-AFL will execute the target program in the
+	core fuzzing module, then it will enter the PSO updating module.
+'limit_time_bound': control how many interesting test cases need to be found
+	before MOpt-AFL quits the pacemaker fuzzing mode and reuses the deterministic stage.
+	0 < 'limit_time_bound' < 1, MOpt-AFL-tmp.
+	'limit_time_bound' >= 1, MOpt-AFL-ever.
+
+Have fun with MOpt in AFL!
diff --git a/docs/README.md b/docs/README.md
new file mode 120000
index 00000000..32d46ee8
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1 @@
+../README.md
\ No newline at end of file
diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt
new file mode 100644
index 00000000..53361f5f
--- /dev/null
+++ b/docs/binaryonly_fuzzing.txt
@@ -0,0 +1,140 @@
+
+Fuzzing binary-only programs with afl++
+=======================================
+
+afl++, libfuzzer and others are great if you have the source code, and
+it allows for very fast and coverage guided fuzzing.
+
+However, if there is only the binary program and not source code available,
+then standard afl++ (dumb mode) is not effective.
+
+The following is a description of how these can be fuzzed with afl++
+
+!!!!!
+TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
+      use afl -Q qemu_mode, or better: use both in parallel.
+!!!!!
+
+
+QEMU
+----
+Qemu is the "native" solution to the program.
+It is available in the ./qemu_mode/ directory and once compiled it can
+be accessed by the afl-fuzz -Q command line option.
+The speed decrease is at about 50%
+It is the easiest to use alternative and even works for cross-platform binaries.
+
+As it is included in afl++ this needs no URL.
+
+
+UNICORN
+-------
+Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
+In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
+Runtime environment and/or loaders have to be written from scratch, if needed.
+On top, block chaining has been removed. This means the speed boost introduced in 
+to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
+For further information, check out ./unicorn_mode.txt.
+
+
+DYNINST
+-------
+Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
+(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst
+instruments the target at load time, and then let it run.
+This is great for some things, e.g. fuzzing, and not so effective for others,
+e.g. malware analysis.
+
+So what we can do with dyninst is taking every basic block, and put afl's
+instrumention code in there - and then save the binary.
+Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
+Sounds great? It is. The issue though - it is a non-trivial problem to
+insert instructions, which change addresses in the process space, so
+everything is still working afterwards. Hence more often than not binaries
+crash when they are run (because of instrumentation).
+
+The speed decrease is about 15-35%, depending on the optimization options
+used with afl-dyninst.
+
+So if dyninst works, it is the best option available. Otherwise it just doesn't
+work well.
+
+https://github.com/vanhauser-thc/afl-dyninst
+
+
+INTEL-PT
+--------
+If you have a newer Intel CPU, you can make use of Intels processor trace.
+The big issue with Intel's PT is the small buffer size and the complex
+encoding of the debug information collected through PT.
+This makes the decoding very CPU intensive and hence slow.
+As a result, the overall speed decrease is about 70-90% (depending on
+the implementation and other factors).
+
+There are two afl intel-pt implementations:
+
+1. https://github.com/junxzm1990/afl-pt
+ => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
+
+2. https://github.com/hunter-ht-2018/ptfuzzer
+ => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
+    be used. This one is faster than the other.
+
+
+CORESIGHT
+---------
+
+Coresight is ARM's answer to Intel's PT.
+There is no implementation so far which handle coresight and getting
+it working on an ARM Linux is very difficult due to custom kernel building
+on embedded systems is difficult. And finding one that has coresight in
+the ARM chip is difficult too.
+My guess is that it is slower than Qemu, but faster than Intel PT.
+If anyone finds any coresight implementation for afl please ping me:
+vh@thc.org
+
+
+PIN & DYNAMORIO
+---------------
+
+Pintool and Dynamorio are dynamic instrumentation engines, and they can be
+used for getting basic block information at runtime.
+Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
+whereas Dynamorio is additionally available for ARM and AARCH64.
+Dynamorio is also 10x faster than Pintool.
+
+The big issue with Dynamorio (and therefore Pintool too) is speed.
+Dynamorio has a speed decrease of 98-99%
+Pintool has a speed decrease of 99.5%
+
+Hence Dynamorio is the option to go for if everything fails, and Pintool
+only if Dynamorio fails too.
+
+Dynamorio solutions:
+  https://github.com/vanhauser-thc/afl-dynamorio
+  https://github.com/mxmssh/drAFL
+  https://github.com/googleprojectzero/winafl/ <= very good but windows only
+
+Pintool solutions:
+  https://github.com/vanhauser-thc/afl-pin
+  https://github.com/mothran/aflpin
+  https://github.com/spinpx/afl_pin_mode  <= only old Pintool version supported
+
+
+Non-AFL solutions
+-----------------
+
+There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
+work with large binaries, others are very slow but have good path discovery,
+some are very hard to set-up ...
+
+QSYM: https://github.com/sslab-gatech/qsym
+Manticore: https://github.com/trailofbits/manticore
+S2E: https://github.com/S2E
+<please send me any missing that are good>
+
+
+
+That's it!
+News, corrections, updates?
+Email vh@thc.org
diff --git a/docs/env_variables.txt b/docs/env_variables.txt
index f5db3b4f..36fdc369 100644
--- a/docs/env_variables.txt
+++ b/docs/env_variables.txt
@@ -7,8 +7,8 @@ Environmental variables
   users or for some types of custom fuzzing setups. See README for the general
   instruction manual.
 
-1) Settings for afl-gcc, afl-clang, and afl-as
-----------------------------------------------
+1) Settings for afl-gcc, afl-clang, and afl-as - and gcc_plugin afl-gcc-fast
+----------------------------------------------------------------------------
 
 Because they can't directly accept command-line options, the compile-time
 tools make fairly broad use of environmental variables:
@@ -82,18 +82,22 @@ discussed in section #1, with the exception of:
   - TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are
     created.
 
+  - AFL_INST_RATIO, as we switched for instrim instrumentation which
+    is more effective but makes not much sense together with this option.
+
 Then there are a few specific features that are only available in llvm_mode:
 
   LAF-INTEL
   =========
     This great feature will split compares to series of single byte comparisons
-    to allow afl-fuzz to find otherwise rather impossible paths.
+    to allow afl-fuzz to find otherwise rather impossible paths. It is not
+    restricted to Intel CPUs ;-)
 
-    - Setting LAF_SPLIT_SWITCHES will split switch()es
+    - Setting AFL_LLVM_LAF_SPLIT_SWITCHES will split switch()es
 
-    - Setting LAF_TRANSFORM_COMPARES will split string compare functions
+    - Setting AFL_LLVM_LAF_TRANSFORM_COMPARES will split string compare functions
 
-    - Setting LAF_SPLIT_COMPARES will split > 8 bit CMP instructions
+    - Setting AFL_LLVM_LAF_SPLIT_COMPARES will split > 8 bit CMP instructions
 
     See llvm_mode/README.laf-intel for more information. 
 
@@ -102,13 +106,33 @@ Then there are a few specific features that are only available in llvm_mode:
     This feature allows selectively instrumentation of the source
 
     - Setting AFL_LLVM_WHITELIST with a filename will only instrument those
-      files that match these names.
+      files that match the names listed in this file.
 
     See llvm_mode/README.whitelist for more information.
 
-Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc,
-because functions are *not* instrumented unconditionally - so low values
-will have a more striking effect. For this tool, 0 is not a valid choice.
+  INSTRIM
+  =======
+    This feature increases the speed by whopping 20% but at the cost of a
+    lower path discovery and therefore coverage.
+
+    - Setting AFL_LLVM_INSTRIM activates this mode
+
+    - Setting AFL_LLVM_INSTRIM_LOOPHEAD=1 expands on INSTRIM to optimize loops.
+      afl-fuzz will only be able to see the path the loop took, but not how
+      many times it was called (unless it is a complex loop).
+
+    See llvm_mode/README.instrim
+
+  NOT_ZERO
+  ========
+
+    - Setting AFL_LLVM_NOT_ZERO=1 during compilation will use counters
+      that skip zero on overflow. This is the default for llvm >= 9,
+      however for llvm versions below that this will increase an unnecessary
+      slowdown due a performance issue that is only fixed in llvm 9+.
+      This feature increases path discovery by a little bit.
+
+    See llvm_mode/README.neverzero
 
 3) Settings for afl-fuzz
 ------------------------
@@ -220,6 +244,10 @@ The QEMU wrapper used to instrument binary-only code supports several settings:
 
   - Setting AFL_INST_LIBS causes the translator to also instrument the code
     inside any dynamically linked libraries (notably including glibc).
+  
+  - Setting AFL_QEMU_COMPCOV enables the CompareCoverage tracing of all
+    cmp and sub in x86 and x86_64. Support for other architectures and
+    comparison functions (mem/strcmp et al.) is planned.
 
   - The underlying QEMU binary will recognize any standard "user space
     emulation" variables (e.g., QEMU_STACK_SIZE), but there should be no
diff --git a/docs/sister_projects.txt b/docs/sister_projects.txt
index 41701e2f..a2eb2a22 100644
--- a/docs/sister_projects.txt
+++ b/docs/sister_projects.txt
@@ -6,6 +6,10 @@ Sister projects
   designed for, or meant to integrate with AFL. See README for the general
   instruction manual.
 
+!!!
+!!! This list is outdated and needs an update, missing: e.g. Angora, FairFuzz
+!!!
+
 -------------------------------------------
 Support for other languages / environments:
 -------------------------------------------
@@ -263,7 +267,7 @@ Static binary-only instrumentation (Aleksandar Nikolich)
   reports better performance compared to QEMU, but occasional translation
   errors with stripped binaries.
 
-  https://github.com/vrtadmin/moflow/tree/master/afl-dyninst
+  https://github.com/vanhauser-thc/afl-dyninst
 
 AFL PIN (Parker Thompson)
 -------------------------
diff --git a/docs/unicorn_mode.txt b/docs/unicorn_mode.txt
new file mode 100644
index 00000000..ae6a2bde
--- /dev/null
+++ b/docs/unicorn_mode.txt
@@ -0,0 +1,107 @@
+=========================================================
+Unicorn-based binary-only instrumentation for afl-fuzz
+=========================================================
+
+1) Introduction
+---------------
+
+The code in ./unicorn_mode allows you to build a standalone feature that
+leverages the Unicorn Engine and allows callers to obtain instrumentation 
+output for black-box, closed-source binary code snippets. This mechanism 
+can be then used by afl-fuzz to stress-test targets that couldn't be built 
+with afl-gcc or used in QEMU mode, or with other extensions such as 
+TriforceAFL.
+
+There is a significant performance penalty compared to native AFL,
+but at least we're able to use AFL on these binaries, right?
+
+The idea and much of the implementation comes from Nathan Voss <njvoss299@gmail.com>.
+
+2) How to use
+-------------
+
+*** Building AFL's Unicorn Mode ***
+
+First, make afl as usual.
+Once that completes successfully you need to build and add in the Unicorn Mode 
+features:
+
+  $ cd unicorn_mode
+  $ ./build_unicorn_support.sh
+
+NOTE: This script downloads a recent Unicorn Engine commit that has been tested 
+and is stable-ish from the Unicorn github page. If you are offline, you'll need 
+to hack up this script a little bit and supply your own copy of Unicorn's latest 
+stable release. It's not very hard, just check out the beginning of the 
+build_unicorn_support.sh script and adjust as necessary.
+
+Building Unicorn will take a little bit (~5-10 minutes). Once it completes 
+it automatically compiles a sample application and verify that it works.
+
+*** Fuzzing with Unicorn Mode ***
+
+To really use unicorn-mode effectively you need to prepare the following:
+
+	* Relevant binary code to be fuzzed
+	* Knowledge of the memory map and good starting state
+	* Folder containing sample inputs to start fuzzing with
+		- Same ideas as any other AFL inputs
+		- Quality/speed of results will depend greatly on quality of starting 
+		  samples
+		- See AFL's guidance on how to create a sample corpus
+	* Unicorn-based test harness which:
+		- Adds memory map regions
+		- Loads binary code into memory		
+		- Emulates at least one instruction*
+			- Yeah, this is lame. See 'Gotchas' section below for more info		
+		- Loads and verifies data to fuzz from a command-line specified file
+			- AFL will provide mutated inputs by changing the file passed to 
+			  the test harness
+			- Presumably the data to be fuzzed is at a fixed buffer address
+			- If input constraints (size, invalid bytes, etc.) are known they 
+			  should be checked after the file is loaded. If a constraint 
+			  fails, just exit the test harness. AFL will treat the input as 
+			  'uninteresting' and move on.
+		- Sets up registers and memory state for beginning of test
+		- Emulates the interested code from beginning to end
+		- If a crash is detected, the test harness must 'crash' by 
+		  throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.)
+
+Once you have all those things ready to go you just need to run afl-fuzz in
+'unicorn-mode' by passing in the '-U' flag:
+
+	$ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@
+
+The normal afl-fuzz command line format applies to everything here. Refer to
+AFL's main documentation for more info about how to use afl-fuzz effectively.
+
+For a much clearer vision of what all of this looks like, please refer to the
+sample provided in the 'unicorn_mode/samples' directory. There is also a blog
+post that goes over the basics at:
+
+https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf
+
+The 'helper_scripts' directory also contains several helper scripts that allow you 
+to dump context from a running process, load it, and hook heap allocations. For details
+on how to use this check out the follow-up blog post to the one linked above.
+
+A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz:
+https://www.usenix.org/conference/woot19/presentation/maier
+
+3) Gotchas, feedback, bugs
+--------------------------
+
+To make sure that AFL's fork server starts up correctly the Unicorn test 
+harness script must emulate at least one instruction before loading the
+data that will be fuzzed from the input file. It doesn't matter what the
+instruction is, nor if it is valid. This is an artifact of how the fork-server
+is started and could likely be fixed with some clever re-arranging of the
+patches applied to Unicorn.
+
+Running the build script builds Unicorn and its python bindings and installs 
+them on your system. This installation will supersede any existing Unicorn
+installation with the patched afl-unicorn version.
+
+Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example
+of how to do this properly! If you don't get this right, AFL will not 
+load any mutated inputs and your fuzzing will be useless!
diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc
new file mode 100644
index 00000000..81cf98c4
--- /dev/null
+++ b/llvm_mode/LLVMInsTrim.so.cc
@@ -0,0 +1,350 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include <unordered_set>
+#include <random>
+#include <list>
+#include <string>
+#include <fstream>
+
+#include "../config.h"
+#include "../debug.h"
+
+#include "MarkNodes.h"
+
+using namespace llvm;
+
+static cl::opt<bool> MarkSetOpt("markset", cl::desc("MarkSet"),
+                                cl::init(false));
+static cl::opt<bool> LoopHeadOpt("loophead", cl::desc("LoopHead"),
+                                 cl::init(false));
+
+namespace {
+  struct InsTrim : public ModulePass {
+
+  protected:
+    std::list<std::string> myWhitelist;
+
+  private:
+    std::mt19937 generator;
+    int total_instr = 0;
+
+    unsigned genLabel() {
+      return generator() % 65536;
+    }
+
+  public:
+    static char ID;
+    InsTrim() : ModulePass(ID), generator(0) {//}
+    
+//    AFLCoverage() : ModulePass(ID) {
+      char* instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
+      if (instWhiteListFilename) {
+        std::string line;
+        std::ifstream fileStream;
+        fileStream.open(instWhiteListFilename);
+        if (!fileStream)
+          report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
+        getline(fileStream, line);
+        while (fileStream) {
+          myWhitelist.push_back(line);
+          getline(fileStream, line);
+        }
+      }
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<DominatorTreeWrapperPass>();
+    }
+
+#if LLVM_VERSION_MAJOR < 4
+    const char *
+#else
+    StringRef
+#endif
+              getPassName() const override {
+      return "InstTrim Instrumentation";
+    }
+
+    bool runOnModule(Module &M) override {
+      char be_quiet = 0;
+      
+      if (isatty(2) && !getenv("AFL_QUIET")) {
+        SAYF(cCYA "LLVMInsTrim" VERSION cRST " by csienslab\n");
+      } else be_quiet = 1;
+    
+#if LLVM_VERSION_MAJOR < 9
+      char* neverZero_counters_str;
+      if ((neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO")) != NULL)
+        OKF("LLVM neverZero activated (by hexcoder)\n");
+#endif
+    
+      if (getenv("AFL_LLVM_INSTRIM_LOOPHEAD") != NULL || getenv("LOOPHEAD") != NULL) {
+        LoopHeadOpt = true;
+      }
+
+      // this is our default
+      MarkSetOpt = true;
+      
+/*    // I dont think this makes sense to port into LLVMInsTrim
+      char* inst_ratio_str = getenv("AFL_INST_RATIO");
+      unsigned int inst_ratio = 100;
+      if (inst_ratio_str) {
+       if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || inst_ratio > 100)
+         FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)");
+      }
+*/
+
+      LLVMContext &C = M.getContext();
+      IntegerType *Int8Ty  = IntegerType::getInt8Ty(C);
+      IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+
+      GlobalVariable *CovMapPtr = new GlobalVariable(
+        M, PointerType::getUnqual(Int8Ty), false, GlobalValue::ExternalLinkage,
+        nullptr, "__afl_area_ptr");
+
+      GlobalVariable *OldPrev = new GlobalVariable(
+        M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc",
+        0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+
+      u64 total_rs = 0;
+      u64 total_hs = 0;
+
+      for (Function &F : M) {
+        if (!F.size()) {
+          continue;
+        }
+
+        if (!myWhitelist.empty()) {
+          bool instrumentBlock = false;
+          DebugLoc Loc;
+          StringRef instFilename;
+
+          for (auto &BB : F) {
+            BasicBlock::iterator IP = BB.getFirstInsertionPt();
+            IRBuilder<> IRB(&(*IP));
+            if (!Loc)
+              Loc = IP->getDebugLoc();
+          }
+
+          if ( Loc ) {
+              DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+
+              unsigned int instLine = cDILoc->getLine();
+              instFilename = cDILoc->getFilename();
+
+              if (instFilename.str().empty()) {
+                  /* If the original location is empty, try using the inlined location */
+                  DILocation *oDILoc = cDILoc->getInlinedAt();
+                  if (oDILoc) {
+                      instFilename = oDILoc->getFilename();
+                      instLine = oDILoc->getLine();
+                  }
+              }
+
+              /* Continue only if we know where we actually are */
+              if (!instFilename.str().empty()) {
+                  for (std::list<std::string>::iterator it = myWhitelist.begin(); it != myWhitelist.end(); ++it) {
+                      if (instFilename.str().length() >= it->length()) {
+                          if (instFilename.str().compare(instFilename.str().length() - it->length(), it->length(), *it) == 0) {
+                              instrumentBlock = true;
+                              break;
+                          }
+                      }
+                  }
+              }
+          }
+
+          /* Either we couldn't figure out our location or the location is
+           * not whitelisted, so we skip instrumentation. */
+          if (!instrumentBlock) {
+            if (!instFilename.str().empty())
+              SAYF(cYEL "[!] " cBRI "Not in whitelist, skipping %s ...\n", instFilename.str().c_str());
+            else
+              SAYF(cYEL "[!] " cBRI "No filename information found, skipping it");
+            continue;
+          }
+        }
+
+        std::unordered_set<BasicBlock *> MS;
+        if (!MarkSetOpt) {
+          for (auto &BB : F) {
+            MS.insert(&BB);
+          }
+          total_rs += F.size();
+        } else {
+          auto Result = markNodes(&F);
+          auto RS = Result.first;
+          auto HS = Result.second;
+
+          MS.insert(RS.begin(), RS.end());
+          if (!LoopHeadOpt) {
+            MS.insert(HS.begin(), HS.end());
+            total_rs += MS.size();
+          } else {
+            DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeSet;
+            DominatorTreeWrapperPass *DTWP = &getAnalysis<DominatorTreeWrapperPass>(F);
+            auto DT = &DTWP->getDomTree();
+
+            total_rs += RS.size();
+            total_hs += HS.size();
+
+            for (BasicBlock *BB : HS) {
+              bool Inserted = false;
+              for (auto BI = pred_begin(BB), BE = pred_end(BB);
+                   BI != BE; ++BI
+              ) {
+                auto Edge = BasicBlockEdge(*BI, BB);
+                if (Edge.isSingleEdge() && DT->dominates(Edge, BB)) {
+                  EdgeSet.insert({*BI, BB});
+                  Inserted = true;
+                  break;
+                }
+              }
+              if (!Inserted) {
+                MS.insert(BB);
+                total_rs += 1;
+                total_hs -= 1;
+              }
+            }
+            for (auto I = EdgeSet.begin(), E = EdgeSet.end(); I != E; ++I) {
+              auto PredBB = I->first;
+              auto SuccBB = I->second;
+              auto NewBB = SplitBlockPredecessors(SuccBB, {PredBB}, ".split",
+                                                  DT, nullptr,
+#if LLVM_VERSION_MAJOR >= 8
+                                                  nullptr,
+#endif
+                                                  false);
+              MS.insert(NewBB);
+            }
+          }
+
+          auto *EBB = &F.getEntryBlock();
+          if (succ_begin(EBB) == succ_end(EBB)) {
+            MS.insert(EBB);
+            total_rs += 1;
+          }
+
+          for (BasicBlock &BB : F) {
+            if (MS.find(&BB) == MS.end()) {
+              continue;
+            }
+            IRBuilder<> IRB(&*BB.getFirstInsertionPt());
+            IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), OldPrev);
+          }
+        }
+
+        for (BasicBlock &BB : F) {
+          auto PI = pred_begin(&BB);
+          auto PE = pred_end(&BB);
+          if (MarkSetOpt && MS.find(&BB) == MS.end()) {
+            continue;
+          }
+
+          IRBuilder<> IRB(&*BB.getFirstInsertionPt());
+          Value *L = NULL;
+          if (PI == PE) {
+            L = ConstantInt::get(Int32Ty, genLabel());
+          } else {
+            auto *PN = PHINode::Create(Int32Ty, 0, "", &*BB.begin());
+            DenseMap<BasicBlock *, unsigned> PredMap;
+            for (auto PI = pred_begin(&BB), PE = pred_end(&BB);
+                 PI != PE; ++PI
+            ) {
+              BasicBlock *PBB = *PI;
+              auto It = PredMap.insert({PBB, genLabel()});
+              unsigned Label = It.first->second;
+              PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
+            }
+            L = PN;
+          }
+
+          /* Load prev_loc */
+          LoadInst *PrevLoc = IRB.CreateLoad(OldPrev);
+          PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+          Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+
+          /* Load SHM pointer */
+          LoadInst *MapPtr = IRB.CreateLoad(CovMapPtr);
+          MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+          Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, L));
+
+          /* Update bitmap */
+          LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+          Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+          
+          Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1));
+
+#if LLVM_VERSION_MAJOR < 9
+          if (neverZero_counters_str != NULL) { // with llvm 9 we make this the default as the bug in llvm is then fixed
+#else
+  #warning "neverZero implementation needs to be reviewed!"
+#endif
+          /* hexcoder: Realize a counter that skips zero during overflow.
+           * Once this counter reaches its maximum value, it next increments to 1
+           *
+           * Instead of
+           * Counter + 1 -> Counter
+           * we inject now this
+           * Counter + 1 -> {Counter, OverflowFlag}
+           * Counter + OverflowFlag -> Counter
+           */
+            auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0));
+            auto carry = IRB.CreateZExt(cf, Int8Ty);
+            Incr = IRB.CreateAdd(Incr, carry);
+#if LLVM_VERSION_MAJOR < 9
+          }
+#endif
+   
+          IRB.CreateStore(Incr, MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+   
+          /* Set prev_loc to cur_loc >> 1 */
+          /*
+          StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc);
+          Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+          */
+
+          total_instr++;
+        }
+      }
+
+      OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n"/*", ratio %u%%)."*/,
+          total_instr, total_rs, total_hs,
+          getenv("AFL_HARDEN") ? "hardened" :
+          ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ?
+          "ASAN/MSAN" : "non-hardened")/*, inst_ratio*/);
+      return false;
+    }
+  }; // end of struct InsTrim
+}  // end of anonymous namespace
+
+char InsTrim::ID = 0;
+
+static void registerAFLPass(const PassManagerBuilder &,
+                            legacy::PassManagerBase &PM) {
+  PM.add(new InsTrim());
+}
+
+static RegisterStandardPasses RegisterAFLPass(
+    PassManagerBuilder::EP_OptimizerLast, registerAFLPass);
+
+static RegisterStandardPasses RegisterAFLPass0(
+    PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass);
diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile
index 6b277536..2b685ddc 100644
--- a/llvm_mode/Makefile
+++ b/llvm_mode/Makefile
@@ -16,6 +16,9 @@
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 
+# For Heiko:
+#TEST_MMAP=1
+
 PREFIX      ?= /usr/local
 HELPER_PATH  = $(PREFIX)/lib/afl
 BIN_PATH     = $(PREFIX)/bin
@@ -23,17 +26,23 @@ BIN_PATH     = $(PREFIX)/bin
 VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
 
 LLVM_CONFIG ?= llvm-config
-#LLVM_OK = $(shell $(LLVM_CONFIG) --version | egrep -q '^[5-6]' && echo 0 || echo 1 )
+LLVMVER  = $(shell $(LLVM_CONFIG) --version)
 LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version | egrep -q '^9|3.0' && echo 1 || echo 0 )
+LLVM_MAJOR = ($shell $(LLVM_CONFIG) --version | sed 's/\..*//')
 
 ifeq "$(LLVM_UNSUPPORTED)" "1"
   $(warn llvm_mode only supports versions 3.8.0 up to 8.x )
 endif
 
+# this is not visible yet:
+ifeq "$(LLVM_MAJOR)" "9"
+  $(info llvm_mode deteted llvm 9, enabling neverZero implementation)
+endif
+
 CFLAGS      ?= -O3 -funroll-loops
 CFLAGS      += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
                -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
-               -DVERSION=\"$(VERSION)\"  
+               -DVERSION=\"$(VERSION)\"
 ifdef AFL_TRACE_PC
   CFLAGS    += -DUSE_TRACE_PC=1
 endif
@@ -45,12 +54,16 @@ CXXFLAGS    += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
 CLANG_CFL    = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS)
 CLANG_LFL    = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS)
 
-# User teor2345 reports that this is required to make things work on MacOS X.
 
+# User teor2345 reports that this is required to make things work on MacOS X.
 ifeq "$(shell uname)" "Darwin"
   CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress
 endif
 
+ifeq "$(shell uname)" "OpenBSD"
+  CLANG_LFL += `$(LLVM_CONFIG) --libdir`/libLLVM.so.0.0
+endif
+
 # We were using llvm-config --bindir to get the location of clang, but
 # this seems to be busted on some distros, so using the one in $PATH is
 # probably better.
@@ -60,13 +73,53 @@ ifeq "$(origin CC)" "default"
   CXX        = clang++
 endif
 
+# sanity check.
+# Are versions of clang --version and llvm-config --version equal?
+CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*([0-9]\.[0-9]\.[0-9]).*/s//\1/p')
+
+
+ifeq "$(shell echo '\#include <sys/ipc.h>@\#include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 )" "1"
+        SHMAT_OK=1
+else
+        SHMAT_OK=0
+        CFLAGS+=-DUSEMMAP=1
+        LDFLAGS += -lrt
+endif
+
+ifeq "$(TEST_MMAP)" "1"
+        SHMAT_OK=0
+        CFLAGS+=-DUSEMMAP=1
+        LDFLAGS += -lrt
+endif
+
+
 ifndef AFL_TRACE_PC
-  PROGS      = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so
+  PROGS      = ../afl-clang-fast ../afl-llvm-pass.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so
 else
   PROGS      = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so
 endif
 
-all: test_deps $(PROGS) test_build all_done
+ifneq "$(CLANGVER)" "$(LLVMVER)"
+  CC = $(shell llvm-config --bindir)/clang
+  CXX = $(shell llvm-config --bindir)/clang++
+endif
+
+all: test_shm test_deps $(PROGS) test_build all_done
+
+
+ifeq "$(SHMAT_OK)" "1"
+
+test_shm:
+	@echo "[+] shmat seems to be working."
+	@rm -f .test2
+
+else
+
+test_shm:
+	@echo "[-] shmat seems not to be working, switching to mmap implementation"
+
+endif
+
 
 test_deps:
 ifndef AFL_TRACE_PC
@@ -77,6 +130,13 @@ else
 endif
 	@echo "[*] Checking for working '$(CC)'..."
 	@which $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
+	@echo "[*] Checking for matching versions of '$(CC)' and '$(LLVM_CONFIG)'"
+ifneq "$(CLANGVER)" "$(LLVMVER)"
+	@echo "[!] WARNING: we have llvm-config version $(LLVMVER) and a clang version $(CLANGVER)"
+	@echo "[!] Retrying with the clang compiler from llvm: CC=`llvm-config --bindir`/clang"
+else
+	@echo "[*] We have llvm-config version $(LLVMVER) with a clang version $(CLANGVER), good."
+endif
 	@echo "[*] Checking for '../afl-showmap'..."
 	@test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 )
 	@echo "[+] All set and ready to build."
@@ -85,8 +145,11 @@ endif
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
 	ln -sf afl-clang-fast ../afl-clang-fast++
 
+../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps
+	$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=gnu++11 -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
+
 ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps
-	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
+	$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=gnu++11 -shared $< -o $@ $(CLANG_LFL)
 
 # laf
 ../split-switches-pass.so:	split-switches-pass.so.cc | test_deps
@@ -110,7 +173,7 @@ endif
 
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) LAF_SPLIT_SWITCHES=1 LAF_TRANSFORM_COMPARES=1 LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
+	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
 	echo 0 | ../afl-showmap -m none -q -o .test-instr0 ./test-instr
 	echo 1 | ../afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
@@ -123,5 +186,5 @@ all_done: test_build
 .NOTPARALLEL: clean
 
 clean:
-	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 
+	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* .test2 test-instr .test-instr0 .test-instr1 
 	rm -f $(PROGS) ../afl-clang-fast++
diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc
new file mode 100644
index 00000000..a156fccb
--- /dev/null
+++ b/llvm_mode/MarkNodes.cc
@@ -0,0 +1,355 @@
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <set>
+#include <vector>
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+DenseMap<BasicBlock *, uint32_t> LMap;
+std::vector<BasicBlock *> Blocks;
+std::set<uint32_t> Marked , Markabove;
+std::vector< std::vector<uint32_t> > Succs , Preds;
+
+void reset(){
+  LMap.clear();
+  Blocks.clear();
+  Marked.clear();
+  Markabove.clear();
+}
+
+uint32_t start_point;
+
+void labelEachBlock(Function *F) {
+  // Fake single endpoint;
+  LMap[NULL] = Blocks.size();
+  Blocks.push_back(NULL);
+ 
+  // Assign the unique LabelID to each block;
+  for (auto I = F->begin(), E = F->end(); I != E; ++I) {
+    BasicBlock *BB = &*I;
+    LMap[BB] = Blocks.size();
+    Blocks.push_back(BB);
+  }
+  
+  start_point = LMap[&F->getEntryBlock()];
+}
+
+void buildCFG(Function *F) {
+  Succs.resize( Blocks.size() );
+  Preds.resize( Blocks.size() );
+  for( size_t i = 0 ; i < Succs.size() ; i ++ ){
+    Succs[ i ].clear();
+    Preds[ i ].clear();
+  }
+
+  //uint32_t FakeID = 0;
+  for (auto S = F->begin(), E = F->end(); S != E; ++S) {
+    BasicBlock *BB = &*S;
+    uint32_t MyID = LMap[BB];
+    //if (succ_begin(BB) == succ_end(BB)) {
+      //Succs[MyID].push_back(FakeID);
+      //Marked.insert(MyID);
+    //}
+    for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+      Succs[MyID].push_back(LMap[*I]);
+    }
+  }
+}
+
+std::vector< std::vector<uint32_t> > tSuccs;
+std::vector<bool> tag , indfs;
+
+void DFStree(size_t now_id) {
+  if(tag[now_id]) return;
+  tag[now_id]=true;
+  indfs[now_id]=true;
+  for (auto succ: tSuccs[now_id]) {
+    if(tag[succ] and indfs[succ]) {
+      Marked.insert(succ);
+      Markabove.insert(succ);
+      continue;
+    }
+    Succs[now_id].push_back(succ);
+    Preds[succ].push_back(now_id);
+    DFStree(succ);
+  }
+  indfs[now_id]=false;
+}
+void turnCFGintoDAG(Function *F) {
+  tSuccs = Succs;
+  tag.resize(Blocks.size());
+  indfs.resize(Blocks.size());
+  for (size_t i = 0; i < Blocks.size(); ++ i) {
+    Succs[i].clear();
+    tag[i]=false;
+    indfs[i]=false;
+  }
+  DFStree(start_point);
+  for (size_t i = 0; i < Blocks.size(); ++ i) 
+    if( Succs[i].empty() ){
+      Succs[i].push_back(0);
+      Preds[0].push_back(i);
+    }
+}
+
+uint32_t timeStamp;
+namespace DominatorTree{
+  std::vector< std::vector<uint32_t> > cov;
+  std::vector<uint32_t> dfn, nfd, par, sdom, idom, mom, mn;
+
+  bool Compare(uint32_t u, uint32_t v) {
+    return dfn[u] < dfn[v];
+  }
+  uint32_t eval(uint32_t u) {
+    if( mom[u] == u ) return u;
+    uint32_t res = eval( mom[u] );
+    if(Compare(sdom[mn[mom[u]]] , sdom[mn[u]])) {
+      mn[u] = mn[mom[u]];
+    }
+    return mom[u] = res;
+  }
+
+  void DFS(uint32_t now) {
+    timeStamp += 1;
+    dfn[now] = timeStamp;
+    nfd[timeStamp - 1] = now;
+    for( auto succ : Succs[now] ) {
+      if( dfn[succ] == 0 ) {
+        par[succ] = now;
+        DFS(succ);
+      }
+    }
+  }
+
+  void DominatorTree(Function *F) {
+    if( Blocks.empty() ) return;
+    uint32_t s = start_point;
+
+    // Initialization
+    mn.resize(Blocks.size());
+    cov.resize(Blocks.size());
+    dfn.resize(Blocks.size());
+    nfd.resize(Blocks.size());
+    par.resize(Blocks.size());
+    mom.resize(Blocks.size());
+    sdom.resize(Blocks.size());
+    idom.resize(Blocks.size());
+
+    for( uint32_t i = 0 ; i < Blocks.size() ; i ++ ) {
+      dfn[i] = 0;
+      nfd[i] = Blocks.size();
+      cov[i].clear();
+      idom[i] = mom[i] = mn[i] = sdom[i] = i;
+    }
+
+    timeStamp = 0;
+    DFS(s);
+
+    for( uint32_t i = Blocks.size() - 1 ; i >= 1u ; i -- ) {
+      uint32_t now = nfd[i];
+      if( now == Blocks.size() ) {
+        continue;
+      }
+      for( uint32_t pre : Preds[ now ] ) {
+        if( dfn[ pre ] ) {
+          eval(pre);
+          if( Compare(sdom[mn[pre]], sdom[now]) ) {
+            sdom[now] = sdom[mn[pre]];
+          }
+        }
+      }
+      cov[sdom[now]].push_back(now);
+      mom[now] = par[now];
+      for( uint32_t x : cov[par[now]] ) {
+        eval(x);
+        if( Compare(sdom[mn[x]], par[now]) ) {
+          idom[x] = mn[x];
+        } else {
+          idom[x] = par[now];
+        }
+      }
+    }
+
+    for( uint32_t i = 1 ; i < Blocks.size() ; i += 1 ) {
+      uint32_t now = nfd[i];
+      if( now == Blocks.size() ) {
+        continue;
+      }
+      if(idom[now] != sdom[now])
+        idom[now] = idom[idom[now]];
+    }
+  }
+}; // End of DominatorTree
+
+std::vector<uint32_t> Visited, InStack;
+std::vector<uint32_t> TopoOrder, InDeg;
+std::vector< std::vector<uint32_t> > t_Succ , t_Pred;
+
+void Go(uint32_t now, uint32_t tt) {
+  if( now == tt ) return;
+  Visited[now] = InStack[now] = timeStamp;
+
+  for(uint32_t nxt : Succs[now]) {
+    if(Visited[nxt] == timeStamp and InStack[nxt] == timeStamp) {
+      Marked.insert(nxt);
+    }
+    t_Succ[now].push_back(nxt);
+    t_Pred[nxt].push_back(now);
+    InDeg[nxt] += 1;
+    if(Visited[nxt] == timeStamp) {
+      continue;
+    }
+    Go(nxt, tt);
+  }
+
+  InStack[now] = 0;
+}
+
+void TopologicalSort(uint32_t ss, uint32_t tt) {
+  timeStamp += 1;
+
+  Go(ss, tt);
+
+  TopoOrder.clear();
+  std::queue<uint32_t> wait;
+  wait.push(ss);
+  while( not wait.empty() ) {
+    uint32_t now = wait.front(); wait.pop();
+    TopoOrder.push_back(now);
+    for(uint32_t nxt : t_Succ[now]) {
+      InDeg[nxt] -= 1;
+      if(InDeg[nxt] == 0u) {
+        wait.push(nxt);
+      }
+    }
+  }
+}
+
+std::vector< std::set<uint32_t> > NextMarked;
+bool Indistinguish(uint32_t node1, uint32_t node2) {
+  if(NextMarked[node1].size() > NextMarked[node2].size()){
+    uint32_t _swap = node1;
+    node1 = node2;
+    node2 = _swap;
+  }
+  for(uint32_t x : NextMarked[node1]) {
+    if( NextMarked[node2].find(x) != NextMarked[node2].end() ) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void MakeUniq(uint32_t now) {
+  bool StopFlag = false;
+  if (Marked.find(now) == Marked.end()) {
+    for(uint32_t pred1 : t_Pred[now]) {
+      for(uint32_t pred2 : t_Pred[now]) {
+        if(pred1 == pred2) continue;
+        if(Indistinguish(pred1, pred2)) {
+          Marked.insert(now);
+          StopFlag = true;
+          break;
+        }
+      }
+      if (StopFlag) {
+        break;
+      }
+    }
+  }
+  if(Marked.find(now) != Marked.end()) {
+    NextMarked[now].insert(now);
+  } else {
+    for(uint32_t pred : t_Pred[now]) {
+      for(uint32_t x : NextMarked[pred]) {
+        NextMarked[now].insert(x);
+      }
+    }
+  }
+}
+
+void MarkSubGraph(uint32_t ss, uint32_t tt) {
+  TopologicalSort(ss, tt);
+  if(TopoOrder.empty()) return;
+
+  for(uint32_t i : TopoOrder) {
+    NextMarked[i].clear();
+  }
+
+  NextMarked[TopoOrder[0]].insert(TopoOrder[0]);
+  for(uint32_t i = 1 ; i < TopoOrder.size() ; i += 1) {
+    MakeUniq(TopoOrder[i]);
+  }
+}
+
+void MarkVertice(Function *F) {
+  uint32_t s = start_point;
+
+  InDeg.resize(Blocks.size());
+  Visited.resize(Blocks.size());
+  InStack.resize(Blocks.size());
+  t_Succ.resize(Blocks.size());
+  t_Pred.resize(Blocks.size());
+  NextMarked.resize(Blocks.size());
+
+  for( uint32_t i = 0 ; i < Blocks.size() ; i += 1 ) {
+    Visited[i] = InStack[i] = InDeg[i] = 0;
+    t_Succ[i].clear();
+    t_Pred[i].clear();
+  }
+  timeStamp = 0;
+  uint32_t t = 0;
+  //MarkSubGraph(s, t);
+  //return;
+
+  while( s != t ) {
+    MarkSubGraph(DominatorTree::idom[t], t);
+    t = DominatorTree::idom[t];
+  }
+
+}
+
+// return {marked nodes}
+std::pair<std::vector<BasicBlock *>,
+          std::vector<BasicBlock *> >markNodes(Function *F) {
+  assert(F->size() > 0 && "Function can not be empty");
+
+  reset();
+  labelEachBlock(F);
+  buildCFG(F);
+  turnCFGintoDAG(F);
+  DominatorTree::DominatorTree(F);
+  MarkVertice(F);
+
+  std::vector<BasicBlock *> Result , ResultAbove;
+  for( uint32_t x : Markabove ) {
+    auto it = Marked.find( x );
+    if( it != Marked.end() )
+      Marked.erase( it );
+    if( x )
+      ResultAbove.push_back(Blocks[x]);
+  }
+  for( uint32_t x : Marked ) {
+    if (x == 0) {
+      continue;
+    } else {
+      Result.push_back(Blocks[x]);
+    }
+  }
+
+  return { Result , ResultAbove };
+}
diff --git a/llvm_mode/MarkNodes.h b/llvm_mode/MarkNodes.h
new file mode 100644
index 00000000..e3bf3ce5
--- /dev/null
+++ b/llvm_mode/MarkNodes.h
@@ -0,0 +1,11 @@
+#ifndef __MARK_NODES__
+#define __MARK_NODES__
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include<vector>
+
+std::pair<std::vector<llvm::BasicBlock *>,
+          std::vector<llvm::BasicBlock *>> markNodes(llvm::Function *F);
+
+#endif
diff --git a/llvm_mode/README.instrim b/llvm_mode/README.instrim
new file mode 100644
index 00000000..956a9856
--- /dev/null
+++ b/llvm_mode/README.instrim
@@ -0,0 +1,26 @@
+# InsTrim
+InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing
+
+
+## Introduction
+
+InsTrim uses CFG and markers to instrument just what is necessary in the
+binary in llvm_mode. It is about 20-25% faster but as a cost has a lower
+path discovery.
+
+
+## Usage
+
+Set the environment variable AFL_LLVM_INSTRIM=1
+
+There is also an advanced mode which instruments loops in a way so that
+afl-fuzz can see which loop path has been selected but not being able to
+see how often the loop has been rerun.
+This again is a tradeoff for speed for less path information.
+To enable this mode set AFL_LLVM_INSTRIM_LOOPHEAD=1
+
+
+## Background
+
+The paper: [InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing]
+(https://www.ndss-symposium.org/wp-content/uploads/2018/07/bar2018_14_Hsu_paper.pdf)
diff --git a/llvm_mode/README.laf-intel b/llvm_mode/README.laf-intel
index 891ab5fd..340216c3 100644
--- a/llvm_mode/README.laf-intel
+++ b/llvm_mode/README.laf-intel
@@ -8,13 +8,13 @@ compile the target project.
 
 The following options exist:
 
-export LAF_SPLIT_SWITCHES=1     Enables the split-switches pass.
+export AFL_LLVM_LAF_SPLIT_SWITCHES=1     Enables the split-switches pass.
 
-export LAF_TRANSFORM_COMPARES=1 Enables the transform-compares pass
+export AFL_LLVM_LAF_TRANSFORM_COMPARES=1 Enables the transform-compares pass
                              (strcmp, memcmp, strncmp, strcasecmp, strncasecmp).
 
-export LAF_SPLIT_COMPARES=1     Enables the split-compares pass.
+export AFL_LLVM_LAF_SPLIT_COMPARES=1     Enables the split-compares pass.
       By default it will split all compares with a bit width <= 64 bits.
       You can change this behaviour by setting 
-          export LAF_SPLIT_COMPARES_BITW=<bit_width>.
+          export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>.
 
diff --git a/llvm_mode/README.llvm b/llvm_mode/README.llvm
index dc860e97..a0c40211 100644
--- a/llvm_mode/README.llvm
+++ b/llvm_mode/README.llvm
@@ -3,6 +3,7 @@ Fast LLVM-based instrumentation for afl-fuzz
 ============================================
 
   (See ../docs/README for the general instruction manual.)
+  (See ../gcc_plugin/README.gcc for the GCC-based instrumentation.)
 
 1) Introduction
 ---------------
@@ -30,7 +31,7 @@ several interesting properties:
   - The instrumentation can cope a bit better with multi-threaded targets.
 
   - Because the feature relies on the internals of LLVM, it is clang-specific
-    and will *not* work with GCC.
+    and will *not* work with GCC (see ../gcc_plugin/ for an alternative).
 
 Once this implementation is shown to be sufficiently robust and portable, it
 will probably replace afl-clang. For now, it can be built separately and
@@ -38,8 +39,8 @@ co-exists with the original code.
 
 The idea and much of the implementation comes from Laszlo Szekeres.
 
-2) How to use
--------------
+2) How to use this
+------------------
 
 In order to leverage this mechanism, you need to have clang installed on your
 system. You should also make sure that the llvm-config tool is in your path
@@ -69,21 +70,47 @@ operating mode of AFL, e.g.:
 Be sure to also include CXX set to afl-clang-fast++ for C++ code.
 
 The tool honors roughly the same environmental variables as afl-gcc (see
-../docs/env_variables.txt). This includes AFL_INST_RATIO, AFL_USE_ASAN,
-AFL_HARDEN, and AFL_DONT_OPTIMIZE.
+../docs/env_variables.txt). This includes AFL_USE_ASAN,
+AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored
+as it does not serve a good purpose with the more effective instrim CFG
+analysis.
 
 Note: if you want the LLVM helper to be installed on your system for all
 users, you need to build it before issuing 'make install' in the parent
 directory.
 
-3) Gotchas, feedback, bugs
+3) Options
+
+Several options are present to make llvm_mode faster or help it rearrange
+the code to make afl-fuzz path discovery easier.
+
+If you need just to instrument specific parts of the code, you can whitelist
+which C/C++ files to actually intrument. See README.whitelist
+
+For splitting memcmp, strncmp, etc. please see README.laf-intel
+
+Then there is an optimized instrumentation strategy that uses CFGs and
+markers to just instrument what is needed. This increases speed by 20-25%
+however has a lower path discovery.
+If you want to use this, set AFL_LLVM_INSTRIM=1
+See README.instrim
+
+Finally if your llvm version is 8 or lower, you can activate a mode that
+prevents that a counter overflow result in a 0 value. This is good for
+path discovery, but the llvm implementation for intel for this functionality
+is not optimal and was only fixed in llvm 9.
+You can set this with AFL_LLVM_NOT_ZERO=1
+See README.neverzero
+
+
+4) Gotchas, feedback, bugs
 --------------------------
 
 This is an early-stage mechanism, so field reports are welcome. You can send bug
 reports to <afl-users@googlegroups.com>.
 
-4) Bonus feature #1: deferred instrumentation
----------------------------------------------
+5) Bonus feature #1: deferred initialization
+--------------------------------------------
 
 AFL tries to optimize performance by executing the targeted binary just once,
 stopping it just before main(), and then cloning this "master" process to get
@@ -129,7 +156,7 @@ will keep working normally when compiled with a tool other than afl-clang-fast.
 Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will
 *not* generate a deferred-initialization binary) - and you should be all set!
 
-5) Bonus feature #2: persistent mode
+6) Bonus feature #2: persistent mode
 ------------------------------------
 
 Some libraries provide APIs that are stateless, or whose state can be reset in
@@ -169,7 +196,7 @@ PS. Because there are task switches still involved, the mode isn't as fast as
 faster than the normal fork() model, and compared to in-process fuzzing,
 should be a lot more robust.
 
-6) Bonus feature #3: new 'trace-pc-guard' mode
+8) Bonus feature #3: new 'trace-pc-guard' mode
 ----------------------------------------------
 
 Recent versions of LLVM are shipping with a built-in execution tracing feature
@@ -178,10 +205,8 @@ post-process the assembly or install any compiler plugins. See:
 
   http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards
 
-As of this writing, the feature is only available on SVN trunk, and is yet to
-make it to an official release of LLVM. Nevertheless, if you have a
-sufficiently recent compiler and want to give it a try, build afl-clang-fast
-this way:
+If you have a sufficiently recent compiler and want to give it a try, build
+afl-clang-fast this way:
 
   AFL_TRACE_PC=1 make clean all
 
diff --git a/llvm_mode/README.neverzero b/llvm_mode/README.neverzero
new file mode 100644
index 00000000..ef873acb
--- /dev/null
+++ b/llvm_mode/README.neverzero
@@ -0,0 +1,22 @@
+Usage
+=====
+
+In larger, complex or reiterative programs the map that collects the edge pairs
+can easily fill up and wrap.
+This is not that much of an issue - unless by chance it wraps just to a 0
+when the program execution ends.
+In this case afl-fuzz is not able to see that the pair has been accessed and
+will ignore it.
+
+NeverZero prevents this behaviour. If a counter wraps, it jumps over the 0
+directly to a 1. This improves path discovery (by a very little amount)
+at a very little cost (one instruction per edge).
+
+This is implemented in afl-gcc, however for llvm_mode this is optional if
+the llvm version is below 9 - as there is a perfomance bug that is only fixed
+in version 9 and onwards.
+
+If you want to enable this for llvm < 9 then set
+
+export AFL_LLVM_NOT_ZERO=1
+
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index 1e2e04ea..a4bb7539 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -32,6 +32,7 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
+#include <assert.h>
 
 static u8*  obj_path;               /* Path to runtime libraries         */
 static u8** cc_params;              /* Parameters passed to the real CC  */
@@ -87,7 +88,7 @@ static void find_obj(u8* argv0) {
     return;
   }
 
-  FATAL("Unable to find 'afl-llvm-rt.o' or 'afl-llvm-pass.so'. Please set AFL_PATH");
+  FATAL("Unable to find 'afl-llvm-rt.o' or 'afl-llvm-pass.so.cc'. Please set AFL_PATH");
  
 }
 
@@ -112,29 +113,29 @@ static void edit_params(u32 argc, char** argv) {
     cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
   }
 
-  /* There are two ways to compile afl-clang-fast. In the traditional mode, we
-     use afl-llvm-pass.so to inject instrumentation. In the experimental
+  /* There are three ways to compile with afl-clang-fast. In the traditional
+     mode, we use afl-llvm-pass.so, then there is libLLVMInsTrim.so which is
+     much faster but has less coverage. Finally tere is the experimental
      'trace-pc-guard' mode, we use native LLVM instrumentation callbacks
-     instead. The latter is a very recent addition - see:
-
+     instead. For trace-pc-guard see:
      http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards */
 
   // laf
-  if (getenv("LAF_SPLIT_SWITCHES")) {
+  if (getenv("LAF_SPLIT_SWITCHES")||getenv("AFL_LLVM_LAF_SPLIT_SWITCHES")) {
     cc_params[cc_par_cnt++] = "-Xclang";
     cc_params[cc_par_cnt++] = "-load";
     cc_params[cc_par_cnt++] = "-Xclang";
     cc_params[cc_par_cnt++] = alloc_printf("%s/split-switches-pass.so", obj_path);
   }
 
-  if (getenv("LAF_TRANSFORM_COMPARES")) {
+  if (getenv("LAF_TRANSFORM_COMPARES")||getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES")) {
     cc_params[cc_par_cnt++] = "-Xclang";
     cc_params[cc_par_cnt++] = "-load";
     cc_params[cc_par_cnt++] = "-Xclang";
     cc_params[cc_par_cnt++] = alloc_printf("%s/compare-transform-pass.so", obj_path);
   }
 
-  if (getenv("LAF_SPLIT_COMPARES")) {
+  if (getenv("LAF_SPLIT_COMPARES")||getenv("AFL_LLVM_LAF_SPLIT_COMPARES")) {
     cc_params[cc_par_cnt++] = "-Xclang";
     cc_params[cc_par_cnt++] = "-load";
     cc_params[cc_par_cnt++] = "-Xclang";
@@ -143,14 +144,18 @@ static void edit_params(u32 argc, char** argv) {
   // /laf
 
 #ifdef USE_TRACE_PC
-  cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard";
-  cc_params[cc_par_cnt++] = "-mllvm";
-  cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0";
+  cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default
+  //cc_params[cc_par_cnt++] = "-mllvm";
+  //cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-cmp,trace-div,trace-gep";
+  //cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0";
 #else
   cc_params[cc_par_cnt++] = "-Xclang";
   cc_params[cc_par_cnt++] = "-load";
   cc_params[cc_par_cnt++] = "-Xclang";
-  cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
+  if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL)
+    cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path);
+  else
+    cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
 #endif /* ^USE_TRACE_PC */
 
   cc_params[cc_par_cnt++] = "-Qunused-arguments";
@@ -246,6 +251,10 @@ static void edit_params(u32 argc, char** argv) {
 
   }
 
+#ifdef USEMMAP
+  cc_params[cc_par_cnt++] = "-lrt";
+#endif
+
   cc_params[cc_par_cnt++] = "-D__AFL_HAVE_MANUAL_CONTROL=1";
   cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
   cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";
diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc
index d46db7c0..cfeff968 100644
--- a/llvm_mode/afl-llvm-pass.so.cc
+++ b/llvm_mode/afl-llvm-pass.so.cc
@@ -118,6 +118,10 @@ bool AFLCoverage::runOnModule(Module &M) {
 
   }
 
+#if LLVM_VERSION_MAJOR < 9
+  char* neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO");
+#endif
+
   /* Get globals for the SHM region and the previous location. Note that
      __afl_prev_loc is thread-local. */
 
@@ -227,21 +231,69 @@ bool AFLCoverage::runOnModule(Module &M) {
 
       LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
       MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-      Value *MapPtrIdx =
-          IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc));
+      Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc));
 
       /* Update bitmap */
 
       LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
       Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
       Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1));
-      IRB.CreateStore(Incr, MapPtrIdx)
-          ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+#if LLVM_VERSION_MAJOR < 9
+      if (neverZero_counters_str != NULL) { // with llvm 9 we make this the default as the bug in llvm is then fixed
+#endif
+        /* hexcoder: Realize a counter that skips zero during overflow.
+         * Once this counter reaches its maximum value, it next increments to 1
+         *
+         * Instead of
+         * Counter + 1 -> Counter
+         * we inject now this
+         * Counter + 1 -> {Counter, OverflowFlag}
+         * Counter + OverflowFlag -> Counter
+         */
+/*       // we keep the old solutions just in case
+         // Solution #1
+         if (neverZero_counters_str[0] == '1') {
+           CallInst *AddOv = IRB.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, Counter, ConstantInt::get(Int8Ty, 1));
+           AddOv->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+           Value *SumWithOverflowBit = AddOv;
+           Incr = IRB.CreateAdd(IRB.CreateExtractValue(SumWithOverflowBit, 0),  // sum 
+                                IRB.CreateZExt( // convert from one bit type to 8 bits type 
+                                               IRB.CreateExtractValue(SumWithOverflowBit, 1), // overflow
+                                               Int8Ty));
+          // Solution #2
+          } else if (neverZero_counters_str[0] == '2') {
+             auto cf = IRB.CreateICmpEQ(Counter, ConstantInt::get(Int8Ty, 255));
+             Value *HowMuch = IRB.CreateAdd(ConstantInt::get(Int8Ty, 1), cf);
+             Incr = IRB.CreateAdd(Counter, HowMuch);
+          // Solution #3
+          } else if (neverZero_counters_str[0] == '3') {
+*/
+          // this is the solution we choose because llvm9 should do the right thing here
+            auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0));
+            auto carry = IRB.CreateZExt(cf, Int8Ty);
+            Incr = IRB.CreateAdd(Incr, carry);
+/*
+         // Solution #4
+         } else if (neverZero_counters_str[0] == '4') {
+            auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1));
+            auto carry = IRB.CreateZExt(cf, Int8Ty);
+            Incr = IRB.CreateAdd(Incr, carry);
+         } else {
+            fprintf(stderr, "Error: unknown value for AFL_NZERO_COUNTS: %s (valid is 1-4)\n", neverZero_counters_str);
+            exit(-1);
+         }
+*/
+#if LLVM_VERSION_MAJOR < 9
+      }
+#endif
+
+      IRB.CreateStore(Incr, MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
       /* Set prev_loc to cur_loc >> 1 */
 
-      StoreInst *Store =
-          IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc);
+      StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc);
       Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
       inst_blocks++;
diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c
index 342dcc90..debde204 100644
--- a/llvm_mode/afl-llvm-rt.o.c
+++ b/llvm_mode/afl-llvm-rt.o.c
@@ -44,6 +44,9 @@
 #  define CONST_PRIO 0
 #endif /* ^USE_TRACE_PC */
 
+#include <sys/mman.h>
+#include <fcntl.h>
+
 
 /* Globals needed by the injected instrumentation. The __afl_area_initial region
    is used for instrumentation output before __afl_map_shm() has a chance to run.
@@ -71,10 +74,34 @@ static void __afl_map_shm(void) {
      hacky .init code to work correctly in projects such as OpenSSL. */
 
   if (id_str) {
+#ifdef USEMMAP
+    const char *shm_file_path = id_str;
+    int shm_fd = -1;
+    unsigned char *shm_base = NULL;
+
+    /* create the shared memory segment as if it was a file */
+    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    if (shm_fd == -1) {
+      printf("shm_open() failed\n");
+      exit(1);
+    }
+
+    /* map the shared memory segment to the address space of the process */
+    shm_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+    if (shm_base == MAP_FAILED) {
+      close(shm_fd);
+      shm_fd = -1;
 
+      printf("mmap() failed\n");
+      exit(2);
+    }
+
+    __afl_area_ptr = shm_base;
+#else
     u32 shm_id = atoi(id_str);
 
     __afl_area_ptr = shmat(shm_id, NULL, 0);
+#endif
 
     /* Whooooops. */
 
diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc
index d9a1f945..d0dbe8ec 100644
--- a/llvm_mode/compare-transform-pass.so.cc
+++ b/llvm_mode/compare-transform-pass.so.cc
@@ -257,6 +257,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const
         std::vector<Value *> args;
         args.push_back(load);
         load = IRB.CreateCall(tolowerFn, args, "tmp");
+        load = IRB.CreateTrunc(load, Int8Ty);
       }
       Value *isub;
       if (HasStr1)
@@ -272,14 +273,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const
         next_bb =  BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
         BranchInst::Create(end_bb, next_bb);
 
-#if LLVM_VERSION_MAJOR < 8
-        TerminatorInst *term = cur_bb->getTerminator();
-#else
-        Instruction *term = cur_bb->getTerminator();
-#endif
         Value *icmp = IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0));
         IRB.CreateCondBr(icmp, next_bb, end_bb);
-        term->eraseFromParent();
+        cur_bb->getTerminator()->eraseFromParent();
       } else {
         //IRB.CreateBr(end_bb);
       }
@@ -304,7 +300,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const
 
 bool CompareTransform::runOnModule(Module &M) {
 
-  llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, extended by heiko@hexco.de\n";
+  if (getenv("AFL_QUIET") == NULL)
+    llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, extended by heiko@hexco.de\n";
   transformCmps(M, true, true, true, true, true);
   verifyModule(M);
 
diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc
index 25ccb3b4..c025628f 100644
--- a/llvm_mode/split-compares-pass.so.cc
+++ b/llvm_mode/split-compares-pass.so.cc
@@ -477,6 +477,8 @@ bool SplitComparesTransform::runOnModule(Module &M) {
   int bitw = 64;
 
   char* bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
+  if (!bitw_env)
+    bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
   if (bitw_env) {
     bitw = atoi(bitw_env);
   }
@@ -485,7 +487,8 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 
   simplifySignedness(M);
 
-  errs() << "Split-compare-pass by laf.intel@gmail.com\n"; 
+  if (getenv("AFL_QUIET") == NULL)
+    errs() << "Split-compare-pass by laf.intel@gmail.com\n"; 
 
   switch (bitw) {
     case 64:
diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc
index 4c28f34c..1ace3185 100644
--- a/llvm_mode/split-switches-pass.so.cc
+++ b/llvm_mode/split-switches-pass.so.cc
@@ -244,7 +244,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) {
 
     /* If there is only the default destination or the condition checks 8 bit or less, don't bother with the code below. */
     if (!SI->getNumCases() || bitw <= 8) {
-      errs() << "skip trivial switch..\n";
+      if (getenv("AFL_QUIET") == NULL)
+        errs() << "skip trivial switch..\n";
       continue;
     }
 
@@ -302,7 +303,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) {
 
 bool SplitSwitchesTransform::runOnModule(Module &M) {
 
-  llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; 
+  if (getenv("AFL_QUIET") == NULL)
+    llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; 
   splitSwitches(M);
   verifyModule(M);
 
diff --git a/python_mutators/README b/python_mutators/README
index 174c8a64..21a16e52 100644
--- a/python_mutators/README
+++ b/python_mutators/README
@@ -9,3 +9,7 @@ simple-chunk-replace.py - this is a simple example where chunks are replaced
 
 common.py 	- this can be used for common functions and helpers.
 		  the examples do not use this though. But you can :)
+
+wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py
+
+XmlMutatorMin.py - module for XML mutation
diff --git a/python_mutators/XmlMutatorMin.py b/python_mutators/XmlMutatorMin.py
new file mode 100644
index 00000000..058b7e61
--- /dev/null
+++ b/python_mutators/XmlMutatorMin.py
@@ -0,0 +1,331 @@
+#!/usr/bin/python
+
+""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """
+
+from __future__ import print_function
+from copy import deepcopy
+from lxml import etree as ET
+import random, re, io
+
+###########################
+# The XmlMutatorMin class #
+###########################
+
+class XmlMutatorMin:
+
+    """
+        Optionals parameters:
+            seed        Seed used by the PRNG (default: "RANDOM")
+            verbose     Verbosity (default: False)
+    """
+
+    def __init__(self, seed="RANDOM", verbose=False):
+
+        """ Initialize seed, database and mutators """
+
+        # Verbosity
+        self.verbose = verbose
+
+        # Initialize PRNG
+        self.seed = str(seed)
+        if self.seed == "RANDOM":
+            random.seed()
+        else:
+            if self.verbose:
+                print("Static seed '%s'" % self.seed)
+            random.seed(self.seed)
+
+        # Initialize input and output documents
+        self.input_tree = None
+        self.tree = None
+
+        # High-level mutators (no database needed)
+        hl_mutators_delete = [ "del_node_and_children", "del_node_but_children", "del_attribute", "del_content" ] # Delete items
+        hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values
+
+        # Exposed mutators
+        self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete
+        
+    def __parse_xml (self, xml):
+
+        """ Parse an XML string. Basic wrapper around lxml.parse() """
+
+        try:
+            # Function parse() takes care of comments / DTD / processing instructions / ...
+        	tree = ET.parse(io.BytesIO(xml))
+        except ET.ParseError:
+            raise RuntimeError("XML isn't well-formed!")
+        except LookupError as e:
+            raise RuntimeError(e)
+
+        # Return a document wrapper
+        return tree
+
+    def __exec_among (self, module, functions, min_times, max_times):
+
+        """ Randomly execute $functions between $min and $max times """
+
+        for i in xrange (random.randint (min_times, max_times)):
+            # Function names are mangled because they are "private"
+            getattr (module, "_XmlMutatorMin__" + random.choice(functions)) ()
+
+    def __serialize_xml (self, tree):
+
+        """ Serialize a XML document. Basic wrapper around lxml.tostring() """
+
+        return ET.tostring(tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding)
+
+    def __ver (self, version):
+
+        """ Helper for displaying lxml version numbers """
+
+        return ".".join(map(str, version))
+
+    def reset (self):
+    
+        """ Reset the mutator """
+
+        self.tree = deepcopy(self.input_tree)
+
+    def init_from_string (self, input_string):
+    
+        """ Initialize the mutator from a XML string """
+
+        # Get a pointer to the top-element
+        self.input_tree = self.__parse_xml(input_string)
+
+        # Get a working copy
+        self.tree = deepcopy(self.input_tree)
+
+    def save_to_string (self):
+    
+        """ Return the current XML document as UTF-8 string """
+
+        # Return a text version of the tree
+        return self.__serialize_xml(self.tree)
+
+    def __pick_element (self, exclude_root_node = False):
+    
+        """ Pick a random element from the current document """
+
+        # Get a list of all elements, but nodes like PI and comments
+        elems = list(self.tree.getroot().iter(tag=ET.Element))
+
+        # Is the root node excluded?
+        if exclude_root_node:
+            start = 1
+        else:
+            start = 0
+
+        # Pick a random element
+        try:
+            elem_id = random.randint (start, len(elems) - 1)
+            elem = elems[elem_id]
+        except ValueError:
+            # Should only occurs if "exclude_root_node = True"
+            return (None, None)
+
+        return (elem_id, elem)
+
+    def __fuzz_attribute (self):
+    
+        """ Fuzz (part of) an attribute value """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Get all the attributes
+        attribs = rand_elem.keys()
+
+        # Is there attributes?
+        if len(attribs) < 1:
+            if self.verbose:
+                print("No attribute: can't replace!")
+            return
+
+        # Pick a random attribute
+        rand_attrib_id = random.randint (0, len(attribs) - 1)
+        rand_attrib = attribs[rand_attrib_id]
+
+        # We have the attribute to modify
+        # Get its value
+        attrib_value = rand_elem.get(rand_attrib);
+        # print("- Value: " + attrib_value)
+
+        # Should we work on the whole value?
+        func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
+        p = re.compile(func_call)
+        l = p.findall(attrib_value)
+        if random.choice((True,False)) and l:
+            # Randomly pick one the function calls
+            (func, args) = random.choice(l)
+            # Split by "," and randomly pick one of the arguments
+            value = random.choice(args.split(','))
+            # Remove superfluous characters
+            unclean_value = value
+            value = value.strip(" ").strip("'")
+            # print("Selected argument: [%s]" % value)
+        else:
+            value = attrib_value
+
+        # For each type, define some possible replacement values
+        choices_number =    ( \
+                                "0", \
+                                "11111", \
+                                "-128", \
+                                "2", \
+                                "-1", \
+                                "1/3", \
+                                "42/0", \
+                                "1094861636 idiv 1.0", \
+                                "-1123329771506872 idiv 3.8", \
+                                "17=$numericRTF", \
+                                str(3 + random.randrange(0, 100)), \
+                            )
+
+        choices_letter =    ( \
+                                "P" * (25 * random.randrange(1, 100)), \
+                                "%s%s%s%s%s%s", \
+                                "foobar", \
+                            )
+
+        choices_alnum =     ( \
+                                "Abc123", \
+                                "020F0302020204030204", \
+                                "020F0302020204030204" * (random.randrange(5, 20)), \
+                            )
+
+        # Fuzz the value
+        if random.choice((True,False)) and value == "":
+
+            # Empty
+            new_value = value
+
+        elif random.choice((True,False)) and value.isdigit():
+
+            # Numbers
+            new_value = random.choice(choices_number)
+
+        elif random.choice((True,False)) and value.isalpha():
+
+            # Letters
+            new_value = random.choice(choices_letter)
+
+        elif random.choice((True,False)) and value.isalnum():
+
+            # Alphanumeric
+            new_value = random.choice(choices_alnum)
+
+        else:
+
+            # Default type
+            new_value = random.choice(choices_alnum + choices_letter + choices_number)
+
+        # If we worked on a substring, apply changes to the whole string
+        if value != attrib_value:
+            # No ' around empty values
+            if new_value != "" and value != "":
+                new_value = "'" + new_value + "'"
+            # Apply changes
+            new_value = attrib_value.replace(unclean_value, new_value)
+
+        # Log something
+        if self.verbose:
+            print("Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+
+        # Modify the attribute
+        rand_elem.set(rand_attrib, new_value.decode("utf-8"))
+
+    def __del_node_and_children (self):
+
+        """ High-level minimizing mutator
+            Delete a random node and its children (i.e. delete a random tree) """
+
+        self.__del_node(True)
+
+    def __del_node_but_children (self):
+
+        """ High-level minimizing mutator
+            Delete a random node but its children (i.e. link them to the parent of the deleted node) """
+
+        self.__del_node(False)
+
+    def __del_node (self, delete_children):
+    
+        """ Called by the __del_node_* mutators """
+
+        # Select a node to modify (but the root one)
+        (rand_elem_id, rand_elem) = self.__pick_element (exclude_root_node = True)
+
+        # If the document includes only a top-level element
+        # Then we can't pick a element (given that "exclude_root_node = True") 
+
+        # Is the document deep enough?
+        if rand_elem is None:
+            if self.verbose:
+                print("Can't delete a node: document not deep enough!")
+            return
+
+        # Log something
+        if self.verbose:
+            but_or_and = "and" if delete_children else "but"
+            print("Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and))
+
+        if delete_children is False:
+            # Link children of the random (soon to be deleted) node to its parent
+            for child in rand_elem:
+                rand_elem.getparent().append(child)
+            
+        # Remove the node
+        rand_elem.getparent().remove(rand_elem)
+
+    def __del_content (self):
+    
+        """ High-level minimizing mutator
+            Delete the attributes and children of a random node """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Log something
+        if self.verbose:
+            print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))
+
+        # Reset the node
+        rand_elem.clear()
+
+    def __del_attribute (self):
+     
+        """ High-level minimizing mutator
+            Delete a random attribute from a random node """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Get all the attributes
+        attribs = rand_elem.keys()
+
+        # Is there attributes?
+        if len(attribs) < 1:
+            if self.verbose:
+                print("No attribute: can't delete!")
+            return
+
+        # Pick a random attribute
+        rand_attrib_id = random.randint (0, len(attribs) - 1)
+        rand_attrib = attribs[rand_attrib_id]
+
+        # Log something
+        if self.verbose:
+            print("Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+
+        # Delete the attribute
+        rand_elem.attrib.pop(rand_attrib)
+
+    def mutate (self, min=1, max=5):
+    
+        """ Execute some high-level mutators between $min and $max times, then some medium-level ones """
+
+        # High-level mutation
+        self.__exec_among(self, self.hl_mutators_all, min, max)
+
diff --git a/python_mutators/wrapper_afl_min.py b/python_mutators/wrapper_afl_min.py
new file mode 100644
index 00000000..df09b40a
--- /dev/null
+++ b/python_mutators/wrapper_afl_min.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+from XmlMutatorMin import XmlMutatorMin
+
+# Default settings (production mode)
+
+__mutator__ = None
+__seed__ = "RANDOM"
+__log__ = False
+__log_file__ = "wrapper.log"
+
+# AFL functions
+
+def log(text):
+    """
+          Logger
+    """
+
+    global __seed__
+    global __log__
+    global __log_file__
+
+    if __log__:
+        with open(__log_file__, "a") as logf:
+            logf.write("[%s] %s\n" % (__seed__, text))
+
+def init(seed):
+    """
+          Called once when AFL starts up. Seed is used to identify the AFL instance in log files
+    """
+
+    global __mutator__
+    global __seed__
+
+    # Get the seed
+    __seed__ = seed
+
+    # Create a global mutation class
+    try:
+	__mutator__ = XmlMutatorMin(__seed__, verbose=__log__)
+        log("init(): Mutator created")
+    except RuntimeError as e:
+        log("init(): Can't create mutator: %s" % e.message)
+
+def fuzz(buf, add_buf):
+    """
+          Called for each fuzzing iteration.
+    """
+
+    global __mutator__ 
+
+    # Do we have a working mutator object?
+    if __mutator__ is None:
+        log("fuzz(): Can't fuzz, no mutator available")
+        return buf
+
+    # Try to use the AFL buffer
+    via_buffer = True
+
+    # Interpret the AFL buffer (an array of bytes) as a string
+    if via_buffer:
+        try:
+            buf_str = str(buf)
+            log("fuzz(): AFL buffer converted to a string")
+        except:
+            via_buffer = False
+            log("fuzz(): Can't convert AFL buffer to a string")
+
+    # Load XML from the AFL string
+    if via_buffer:
+        try:
+            __mutator__.init_from_string(buf_str)
+            log("fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" % len(buf_str))
+        except:
+            via_buffer = False
+            log("fuzz(): Can't initialize mutator with AFL buffer")
+
+    # If init from AFL buffer wasn't succesful
+    if not via_buffer:
+         log("fuzz(): Returning unmodified AFL buffer")
+         return buf
+
+    # Sucessful initialization -> mutate
+    try:
+        __mutator__.mutate(max=5)
+        log("fuzz(): Input mutated")
+    except:
+        log("fuzz(): Can't mutate input => returning buf")
+        return buf
+            
+    # Convert mutated data to a array of bytes
+    try:
+        data = bytearray(__mutator__.save_to_string())
+        log("fuzz(): Mutated data converted as bytes")
+    except:
+        log("fuzz(): Can't convert mutated data to bytes => returning buf")
+        return buf
+
+    # Everything went fine, returning mutated content
+    log("fuzz(): Returning %d bytes" % len(data))
+    return data
+
+# Main (for debug)
+
+if __name__ == '__main__':
+
+    __log__ = True
+    __log_file__ = "/dev/stdout"
+    __seed__ = "RANDOM"
+
+    init(__seed__)
+
+    in_1 = bytearray("<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>")
+    in_2 = bytearray("<abc abc123='456' abcCBA='ppppppppppppppppppppppppppppp'/>")
+    out = fuzz(in_1, in_2)
+    print(out)
+
diff --git a/qemu_mode/README.qemu b/qemu_mode/README.qemu
index cf29088b..124fce12 100644
--- a/qemu_mode/README.qemu
+++ b/qemu_mode/README.qemu
@@ -117,7 +117,7 @@ program control flow without actually executing each and every code path.
 If you want to experiment with this mode of operation, there is a module
 contributed by Aleksandar Nikolich:
 
-  https://github.com/vrtadmin/moflow/tree/master/afl-dyninst
+  https://github.com/vanhauser-thc/afl-dyninst
   https://groups.google.com/forum/#!topic/afl-users/HlSQdbOTlpg
 
 At this point, the author reports the possibility of hiccups with stripped
diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh
index 1e952f4e..78ad5680 100755
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@@ -133,6 +133,7 @@ patch -p1 <../patches/cpu-exec.diff || exit 1
 patch -p1 <../patches/syscall.diff || exit 1
 patch -p1 <../patches/translate-all.diff || exit 1
 patch -p1 <../patches/tcg.diff || exit 1
+patch -p1 <../patches/i386-translate.diff || exit 1
 
 echo "[+] Patching done."
 
diff --git a/qemu_mode/libcompcov/Makefile b/qemu_mode/libcompcov/Makefile
new file mode 100644
index 00000000..c984588b
--- /dev/null
+++ b/qemu_mode/libcompcov/Makefile
@@ -0,0 +1,42 @@
+#
+# american fuzzy lop - libcompcov
+# --------------------------------
+#
+# Written by Andrea Fioraldi <andreafioraldi@gmail.com>
+#
+# Copyright 2019 Andrea Fioraldi. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+
+PREFIX      ?= /usr/local
+HELPER_PATH  = $(PREFIX)/lib/afl
+
+VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
+
+CFLAGS      ?= -O3 -funroll-loops
+CFLAGS      += -Wall -Wno-unused-result -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign
+LDFLAGS     += -ldl
+
+all: libcompcov.so compcovtest
+
+libcompcov.so: libcompcov.so.c ../../config.h
+	$(CC) $(CFLAGS) -shared -fPIC $< -o $@ $(LDFLAGS)
+
+.NOTPARALLEL: clean
+
+clean:
+	rm -f *.o *.so *~ a.out core core.[1-9][0-9]*
+	rm -f libcompcov.so compcovtest
+
+compcovtest:	compcovtest.cc
+	$(CXX) $< -o $@ 
+
+install: all
+	install -m 755 libcompcov.so $${DESTDIR}$(HELPER_PATH)
+	install -m 644 README.compcov $${DESTDIR}$(HELPER_PATH)
+
diff --git a/qemu_mode/libcompcov/README.compcov b/qemu_mode/libcompcov/README.compcov
new file mode 100644
index 00000000..2a4a0ee5
--- /dev/null
+++ b/qemu_mode/libcompcov/README.compcov
@@ -0,0 +1,33 @@
+================================================================
+strcmp() / memcmp() CompareCoverage library for AFLplusplus-QEMU
+================================================================
+
+  Written by Andrea Fioraldi <andreafioraldi@gmail.com>
+
+This Linux-only companion library allows you to instrument strcmp(), memcmp(),
+and related functions to log the CompareCoverage of these libcalls.
+
+Use this with caution. While this can speedup a lot the bypass of hard
+branch conditions it can also waste a lot of time and take up unnecessary space
+in the shared memory when logging the coverage related to functions that
+doesn't process input-related data.
+
+To use the library, you *need* to make sure that your fuzzing target is linked
+dynamically and make use of strcmp(), memcmp(), and related functions.
+For optimized binaries this is an issue, those functions are often inlined
+and this module is not capable to log the coverage in this case.
+
+If you have the source code of the fuzzing target you should nto use this
+library and QEMU but build ot with afl-clang-fast and the laf-intel options.
+
+To use this library make sure to preload it with AFL_PRELOAD.
+
+  export AFL_PRELOAD=/path/to/libcompcov.so
+  export AFL_QEMU_COMPCOV=1
+  
+  afl-fuzz -Q -i input -o output <your options> -- <target args>
+
+The library make use of https://github.com/ouadev/proc_maps_parser and so it is
+Linux specific. However this is not a strict dependency, other UNIX operating
+systems can be supported simply replacing the code related to the
+/proc/self/maps parsing.
diff --git a/qemu_mode/libcompcov/compcovtest.cc b/qemu_mode/libcompcov/compcovtest.cc
new file mode 100644
index 00000000..fd1fda00
--- /dev/null
+++ b/qemu_mode/libcompcov/compcovtest.cc
@@ -0,0 +1,63 @@
+/////////////////////////////////////////////////////////////////////////
+//
+// Author: Mateusz Jurczyk (mjurczyk@google.com)
+//
+// Copyright 2019 Google LLC
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// https://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// solution: echo -ne 'The quick brown fox jumps over the lazy dog\xbe\xba\xfe\xca\xbe\xba\xfe\xca\xde\xc0\xad\xde\xef\xbe' | ./compcovtest
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+int main() {
+  char buffer[44] = { /* zero padding */ };
+  fread(buffer, 1, sizeof(buffer) - 1, stdin);
+
+  if (memcmp(&buffer[0], "The quick brown fox ", 20) != 0 ||
+      strncmp(&buffer[20], "jumps over ", 11) != 0 ||
+      strcmp(&buffer[31], "the lazy dog") != 0) {
+    return 1;
+  }
+
+  uint64_t x = 0;
+  fread(&x, sizeof(x), 1, stdin);
+  if (x != 0xCAFEBABECAFEBABE) {
+    return 2;
+  }
+
+  uint32_t y = 0;
+  fread(&y, sizeof(y), 1, stdin);
+  if (y != 0xDEADC0DE) {
+    return 3;
+  }
+
+  uint16_t z = 0;
+  fread(&z, sizeof(z), 1, stdin);
+
+  switch (z) {
+    case 0xBEEF:
+      break;
+
+    default:
+      return 4;
+  }
+
+  printf("Puzzle solved, congrats!\n");
+  abort();
+  return 0;
+}
diff --git a/qemu_mode/libcompcov/libcompcov.so.c b/qemu_mode/libcompcov/libcompcov.so.c
new file mode 100644
index 00000000..582230db
--- /dev/null
+++ b/qemu_mode/libcompcov/libcompcov.so.c
@@ -0,0 +1,279 @@
+/*
+
+   american fuzzy lop++ - strcmp() / memcmp() CompareCoverage library
+   ------------------------------------------------------------------
+
+   Written and maintained by Andrea Fioraldi <andreafioraldi@gmail.com>
+
+   Copyright 2019 Andrea Fioraldi. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This Linux-only companion library allows you to instrument strcmp(),
+   memcmp(), and related functions to get compare coverage.
+   See README.compcov for more info.
+
+ */
+
+#define _GNU_SOURCE
+#include <dlfcn.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+
+#include "../../types.h"
+#include "../../config.h"
+
+#include "pmparser.h"
+
+#ifndef __linux__
+#  error "Sorry, this library is Linux-specific for now!"
+#endif /* !__linux__ */
+
+/* Change this value to tune the compare coverage */
+
+#define MAX_CMP_LENGTH 32
+
+static void *__compcov_code_start,
+            *__compcov_code_end;
+
+static u8 *__compcov_afl_map;
+
+static int (*__libc_strcmp)(const char*, const char*);
+static int (*__libc_strncmp)(const char*, const char*, size_t);
+static int (*__libc_strcasecmp)(const char*, const char*);
+static int (*__libc_strncasecmp)(const char*, const char*, size_t);
+static int (*__libc_memcmp)(const void*, const void*, size_t);
+
+static int debug_fd = -1;
+
+
+static size_t __strlen2(const char *s1, const char *s2, size_t max_length) {
+  // from https://github.com/googleprojectzero/CompareCoverage
+  
+  size_t len = 0;
+  for (; len < max_length && s1[len] != '\0' && s2[len] != '\0'; len++) { }
+  return len;
+}
+
+/* Identify the binary boundaries in the memory mapping */
+
+static void __compcov_load(void) {
+  
+  __libc_strcmp = dlsym(RTLD_NEXT, "strcmp");
+  __libc_strncmp = dlsym(RTLD_NEXT, "strncmp");
+  __libc_strcasecmp = dlsym(RTLD_NEXT, "strcasecmp");
+  __libc_strncasecmp = dlsym(RTLD_NEXT, "strncasecmp");
+  __libc_memcmp = dlsym(RTLD_NEXT, "memcmp");
+  
+  char *id_str = getenv(SHM_ENV_VAR);
+  int shm_id;
+
+  if (id_str) {
+
+    shm_id = atoi(id_str);
+    __compcov_afl_map = shmat(shm_id, NULL, 0);
+
+    if (__compcov_afl_map == (void*)-1) exit(1);
+  } else {
+  
+    __compcov_afl_map = calloc(1, MAP_SIZE);
+  }
+
+  if (getenv("AFL_INST_LIBS")) {
+  
+    __compcov_code_start = (void*)0;
+    __compcov_code_end = (void*)-1;
+    return;
+  }
+
+  char* bin_name = getenv("AFL_COMPCOV_BINNAME");
+
+  procmaps_iterator* maps = pmparser_parse(-1);
+  procmaps_struct* maps_tmp = NULL;
+
+  while ((maps_tmp = pmparser_next(maps)) != NULL) {
+  
+    /* If AFL_COMPCOV_BINNAME is not set pick the first executable segment */
+    if (!bin_name || strstr(maps_tmp->pathname, bin_name) != NULL) {
+    
+      if (maps_tmp->is_x) {
+        if (!__compcov_code_start)
+            __compcov_code_start = maps_tmp->addr_start;
+        if (!__compcov_code_end)
+            __compcov_code_end = maps_tmp->addr_end;
+      }
+    }
+  }
+
+  pmparser_free(maps);
+}
+
+
+static void __compcov_trace(u64 cur_loc, const u8* v0, const u8* v1, size_t n) {
+
+  size_t i;
+  
+  if (debug_fd != 1) {
+    char debugbuf[4096];
+    snprintf(debugbuf, sizeof(debugbuf), "0x%llx %s %s %lu\n", cur_loc, v0 == NULL ? "(null)" : (char*)v0, v1 == NULL ? "(null)" : (char*)v1, n);
+    write(debug_fd, debugbuf, strlen(debugbuf));
+  }
+  
+  for (i = 0; i < n && v0[i] == v1[i]; ++i) {
+  
+    __compcov_afl_map[cur_loc +i]++;
+  }
+}
+
+/* Check an address against the list of read-only mappings. */
+
+static u8 __compcov_is_in_bound(const void* ptr) {
+
+  return ptr >= __compcov_code_start && ptr < __compcov_code_end;
+}
+
+
+/* Replacements for strcmp(), memcmp(), and so on. Note that these will be used
+   only if the target is compiled with -fno-builtins and linked dynamically. */
+
+#undef strcmp
+
+int strcmp(const char* str1, const char* str2) {
+
+  void* retaddr = __builtin_return_address(0);
+  
+  if (__compcov_is_in_bound(retaddr)) {
+
+    size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1);
+    
+    if (n <= MAX_CMP_LENGTH) {
+    
+      u64 cur_loc = (u64)retaddr;
+      cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+      cur_loc &= MAP_SIZE - 1;
+      
+      __compcov_trace(cur_loc, str1, str2, n);
+    }
+  }
+
+  return __libc_strcmp(str1, str2);
+}
+
+
+#undef strncmp
+
+int strncmp(const char* str1, const char* str2, size_t len) {
+
+  void* retaddr = __builtin_return_address(0);
+  
+  if (__compcov_is_in_bound(retaddr)) {
+
+    size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1);
+    n = MIN(n, len);
+    
+    if (n <= MAX_CMP_LENGTH) {
+    
+      u64 cur_loc = (u64)retaddr;
+      cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+      cur_loc &= MAP_SIZE - 1;
+      
+      __compcov_trace(cur_loc, str1, str2, n);
+    }
+  }
+  
+  return __libc_strncmp(str1, str2, len);
+}
+
+
+#undef strcasecmp
+
+int strcasecmp(const char* str1, const char* str2) {
+
+  void* retaddr = __builtin_return_address(0);
+  
+  if (__compcov_is_in_bound(retaddr)) {
+    /* Fallback to strcmp, maybe improve in future */
+
+    size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1);
+    
+    if (n <= MAX_CMP_LENGTH) {
+    
+      u64 cur_loc = (u64)retaddr;
+      cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+      cur_loc &= MAP_SIZE - 1;
+      
+      __compcov_trace(cur_loc, str1, str2, n);
+    }
+  }
+
+  return __libc_strcasecmp(str1, str2);
+}
+
+
+#undef strncasecmp
+
+int strncasecmp(const char* str1, const char* str2, size_t len) {
+
+  void* retaddr = __builtin_return_address(0);
+  
+  if (__compcov_is_in_bound(retaddr)) {
+    /* Fallback to strncmp, maybe improve in future */
+
+    size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1);
+    n = MIN(n, len);
+    
+    if (n <= MAX_CMP_LENGTH) {
+    
+      u64 cur_loc = (u64)retaddr;
+      cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+      cur_loc &= MAP_SIZE - 1;
+      
+      __compcov_trace(cur_loc, str1, str2, n);
+    }
+  }
+
+  return __libc_strncasecmp(str1, str2, len);
+}
+
+
+#undef memcmp
+
+int memcmp(const void* mem1, const void* mem2, size_t len) {
+
+  void* retaddr = __builtin_return_address(0);
+  
+  if (__compcov_is_in_bound(retaddr)) {
+
+    size_t n = len;
+    
+    if (n <= MAX_CMP_LENGTH) {
+    
+      u64 cur_loc = (u64)retaddr;
+      cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+      cur_loc &= MAP_SIZE - 1;
+      
+      __compcov_trace(cur_loc, mem1, mem2, n);
+    }
+  }
+
+  return __libc_memcmp(mem1, mem2, len);
+}
+
+/* Init code to open init the library. */
+
+__attribute__((constructor)) void __compcov_init(void) {
+
+  if (getenv("AFL_QEMU_COMPCOV_DEBUG") != NULL)
+    debug_fd = open("compcov.debug", O_WRONLY | O_CREAT | O_TRUNC | O_SYNC, 0644);
+
+  __compcov_load();
+}
+
+
diff --git a/qemu_mode/libcompcov/pmparser.h b/qemu_mode/libcompcov/pmparser.h
new file mode 100644
index 00000000..34d0cd50
--- /dev/null
+++ b/qemu_mode/libcompcov/pmparser.h
@@ -0,0 +1,280 @@
+/*
+ @Author	: ouadimjamal@gmail.com
+ @date		: December 2015
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.  No representations are made about the suitability of this
+software for any purpose.  It is provided "as is" without express or
+implied warranty.
+
+ */
+
+#ifndef H_PMPARSER
+#define H_PMPARSER
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/limits.h>
+
+//maximum line length in a procmaps file
+#define PROCMAPS_LINE_MAX_LENGTH  (PATH_MAX + 100) 
+/**
+ * procmaps_struct
+ * @desc hold all the information about an area in the process's  VM
+ */
+typedef struct procmaps_struct{
+	void* addr_start; 	//< start address of the area
+	void* addr_end; 	//< end address
+	unsigned long length; //< size of the range
+
+	char perm[5];		//< permissions rwxp
+	short is_r;			//< rewrote of perm with short flags
+	short is_w;
+	short is_x;
+	short is_p;
+
+	long offset;	//< offset
+	char dev[12];	//< dev major:minor
+	int inode;		//< inode of the file that backs the area
+
+	char pathname[600];		//< the path of the file that backs the area
+	//chained list
+	struct procmaps_struct* next;		//<handler of the chinaed list
+} procmaps_struct;
+
+/**
+ * procmaps_iterator
+ * @desc holds iterating information
+ */
+typedef struct procmaps_iterator{
+	procmaps_struct* head;
+	procmaps_struct* current;
+} procmaps_iterator;
+/**
+ * pmparser_parse
+ * @param pid the process id whose memory map to be parser. the current process if pid<0
+ * @return an iterator over all the nodes
+ */
+procmaps_iterator* pmparser_parse(int pid);
+
+/**
+ * pmparser_next
+ * @description move between areas
+ * @param p_procmaps_it the iterator to move on step in the chained list
+ * @return a procmaps structure filled with information about this VM area
+ */
+procmaps_struct* pmparser_next(procmaps_iterator* p_procmaps_it);
+/**
+ * pmparser_free
+ * @description should be called at the end to free the resources
+ * @param p_procmaps_it the iterator structure returned by pmparser_parse
+ */
+void pmparser_free(procmaps_iterator* p_procmaps_it);
+
+/**
+ * _pmparser_split_line
+ * @description internal usage
+ */
+void _pmparser_split_line(char*buf,char*addr1,char*addr2,char*perm, char* offset, char* device,char*inode,char* pathname);
+
+/**
+ * pmparser_print
+ * @param map the head of the list
+ * @order the order of the area to print, -1 to print everything
+ */
+void pmparser_print(procmaps_struct* map,int order);
+
+
+/**
+ * gobal variables
+ */
+//procmaps_struct* g_last_head=NULL;
+//procmaps_struct* g_current=NULL;
+
+
+procmaps_iterator* pmparser_parse(int pid){
+	procmaps_iterator* maps_it = malloc(sizeof(procmaps_iterator));
+	char maps_path[500];
+	if(pid>=0 ){
+		sprintf(maps_path,"/proc/%d/maps",pid);
+	}else{
+		sprintf(maps_path,"/proc/self/maps");
+	}
+	FILE* file=fopen(maps_path,"r");
+	if(!file){
+		fprintf(stderr,"pmparser : cannot open the memory maps, %s\n",strerror(errno));
+		return NULL;
+	}
+	int ind=0;char buf[PROCMAPS_LINE_MAX_LENGTH];
+	//int c;
+	procmaps_struct* list_maps=NULL;
+	procmaps_struct* tmp;
+	procmaps_struct* current_node=list_maps;
+	char addr1[20],addr2[20], perm[8], offset[20], dev[10],inode[30],pathname[PATH_MAX];
+	while( !feof(file) ){
+		fgets(buf,PROCMAPS_LINE_MAX_LENGTH,file);
+		//allocate a node
+		tmp=(procmaps_struct*)malloc(sizeof(procmaps_struct));
+		//fill the node
+		_pmparser_split_line(buf,addr1,addr2,perm,offset, dev,inode,pathname);
+		//printf("#%s",buf);
+		//printf("%s-%s %s %s %s %s\t%s\n",addr1,addr2,perm,offset,dev,inode,pathname);
+		//addr_start & addr_end
+		//unsigned long l_addr_start;
+		sscanf(addr1,"%lx",(long unsigned *)&tmp->addr_start );
+		sscanf(addr2,"%lx",(long unsigned *)&tmp->addr_end );
+		//size
+		tmp->length=(unsigned long)(tmp->addr_end-tmp->addr_start);
+		//perm
+		strcpy(tmp->perm,perm);
+		tmp->is_r=(perm[0]=='r');
+		tmp->is_w=(perm[1]=='w');
+		tmp->is_x=(perm[2]=='x');
+		tmp->is_p=(perm[3]=='p');
+
+		//offset
+		sscanf(offset,"%lx",&tmp->offset );
+		//device
+		strcpy(tmp->dev,dev);
+		//inode
+		tmp->inode=atoi(inode);
+		//pathname
+		strcpy(tmp->pathname,pathname);
+		tmp->next=NULL;
+		//attach the node
+		if(ind==0){
+			list_maps=tmp;
+			list_maps->next=NULL;
+			current_node=list_maps;
+		}
+		current_node->next=tmp;
+		current_node=tmp;
+		ind++;
+		//printf("%s",buf);
+	}
+
+	//close file
+	fclose(file);
+
+
+	//g_last_head=list_maps;
+	maps_it->head = list_maps;
+	maps_it->current =  list_maps;
+	return maps_it;
+}
+
+
+procmaps_struct* pmparser_next(procmaps_iterator* p_procmaps_it){
+	if(p_procmaps_it->current == NULL)
+		return NULL;
+	procmaps_struct* p_current = p_procmaps_it->current;
+	p_procmaps_it->current = p_procmaps_it->current->next;
+	return p_current;
+	/*
+	if(g_current==NULL){
+		g_current=g_last_head;
+	}else
+		g_current=g_current->next;
+
+	return g_current;
+	*/
+}
+
+
+
+void pmparser_free(procmaps_iterator* p_procmaps_it){
+	procmaps_struct* maps_list = p_procmaps_it->head;
+	if(maps_list==NULL) return ;
+	procmaps_struct* act=maps_list;
+	procmaps_struct* nxt=act->next;
+	while(act!=NULL){
+		free(act);
+		act=nxt;
+		if(nxt!=NULL)
+			nxt=nxt->next;
+	}
+
+}
+
+
+void _pmparser_split_line(
+		char*buf,char*addr1,char*addr2,
+		char*perm,char* offset,char* device,char*inode,
+		char* pathname){
+	//
+	int orig=0;
+	int i=0;
+	//addr1
+	while(buf[i]!='-'){
+		addr1[i-orig]=buf[i];
+		i++;
+	}
+	addr1[i]='\0';
+	i++;
+	//addr2
+	orig=i;
+	while(buf[i]!='\t' && buf[i]!=' '){
+		addr2[i-orig]=buf[i];
+		i++;
+	}
+	addr2[i-orig]='\0';
+
+	//perm
+	while(buf[i]=='\t' || buf[i]==' ')
+		i++;
+	orig=i;
+	while(buf[i]!='\t' && buf[i]!=' '){
+		perm[i-orig]=buf[i];
+		i++;
+	}
+	perm[i-orig]='\0';
+	//offset
+	while(buf[i]=='\t' || buf[i]==' ')
+		i++;
+	orig=i;
+	while(buf[i]!='\t' && buf[i]!=' '){
+		offset[i-orig]=buf[i];
+		i++;
+	}
+	offset[i-orig]='\0';
+	//dev
+	while(buf[i]=='\t' || buf[i]==' ')
+		i++;
+	orig=i;
+	while(buf[i]!='\t' && buf[i]!=' '){
+		device[i-orig]=buf[i];
+		i++;
+	}
+	device[i-orig]='\0';
+	//inode
+	while(buf[i]=='\t' || buf[i]==' ')
+		i++;
+	orig=i;
+	while(buf[i]!='\t' && buf[i]!=' '){
+		inode[i-orig]=buf[i];
+		i++;
+	}
+	inode[i-orig]='\0';
+	//pathname
+	pathname[0]='\0';
+	while(buf[i]=='\t' || buf[i]==' ')
+		i++;
+	orig=i;
+	while(buf[i]!='\t' && buf[i]!=' ' && buf[i]!='\n'){
+		pathname[i-orig]=buf[i];
+		i++;
+	}
+	pathname[i-orig]='\0';
+
+}
+
+
+#endif
diff --git a/qemu_mode/patches/afl-qemu-cpu-inl.h b/qemu_mode/patches/afl-qemu-cpu-inl.h
index 32030408..03951fea 100644
--- a/qemu_mode/patches/afl-qemu-cpu-inl.h
+++ b/qemu_mode/patches/afl-qemu-cpu-inl.h
@@ -9,7 +9,8 @@
 
    TCG instrumentation and block chaining support by Andrea Biondo
                                       <andrea.biondo965@gmail.com>
-   QEMU 3.1.0 port and thread-safety by Andrea Fioraldi
+
+   QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi
                                       <andreafioraldi@gmail.com>
 
    Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
@@ -65,6 +66,8 @@ abi_ulong afl_entry_point, /* ELF entry point (_start) */
           afl_start_code,  /* .text start pointer      */
           afl_end_code;    /* .text end pointer        */
 
+u8 afl_enable_compcov;
+
 /* Set in the child process in forkserver mode: */
 
 static int forkserver_installed = 0;
@@ -147,7 +150,6 @@ static void afl_setup(void) {
 
     if (inst_r) afl_area_ptr[0] = 1;
 
-
   }
 
   if (getenv("AFL_INST_LIBS")) {
@@ -156,6 +158,11 @@ static void afl_setup(void) {
     afl_end_code   = (abi_ulong)-1;
 
   }
+  
+  if (getenv("AFL_QEMU_COMPCOV")) {
+
+    afl_enable_compcov = 1;
+  }
 
   /* pthread_atfork() seems somewhat broken in util/rcu.c, and I'm
      not entirely sure what is the cause. This disables that
@@ -271,6 +278,25 @@ static void afl_request_tsl(target_ulong pc, target_ulong cb, uint32_t flags, ui
 
 }
 
+
+/* Check if an address is valid in the current mapping */
+
+static inline int is_valid_addr(target_ulong addr) {
+
+    int l, flags;
+    target_ulong page;
+    void * p;
+    
+    page = addr & TARGET_PAGE_MASK;
+    l = (page + TARGET_PAGE_SIZE) - addr;
+    
+    flags = page_get_flags(page);
+    if (!(flags & PAGE_VALID) || !(flags & PAGE_READ))
+        return 0;
+    
+    return 1;
+}
+
 /* This is the other side of the same channel. Since timeouts are handled by
    afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
 
@@ -282,6 +308,8 @@ static void afl_wait_tsl(CPUState *cpu, int fd) {
 
   while (1) {
 
+    u8 invalid_pc = 0;
+
     /* Broken pipe means it's time to return to the fork server routine. */
 
     if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
@@ -290,19 +318,34 @@ static void afl_wait_tsl(CPUState *cpu, int fd) {
     tb = tb_htable_lookup(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags, t.tb.cf_mask);
 
     if(!tb) {
-      mmap_lock();
-      tb = tb_gen_code(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags, 0);
-      mmap_unlock();
+      
+      /* The child may request to transate a block of memory that is not
+         mapped in the parent (e.g. jitted code or dlopened code).
+         This causes a SIGSEV in gen_intermediate_code() and associated
+         subroutines. We simply avoid caching of such blocks. */
+
+      if (is_valid_addr(t.tb.pc)) {
+    
+        mmap_lock();
+        tb = tb_gen_code(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags, 0);
+        mmap_unlock();
+      } else {
+      
+        invalid_pc = 1; 
+      }
     }
 
     if (t.is_chain) {
       if (read(fd, &c, sizeof(struct afl_chain)) != sizeof(struct afl_chain))
         break;
 
-      last_tb = tb_htable_lookup(cpu, c.last_tb.pc, c.last_tb.cs_base,
-                                 c.last_tb.flags, c.cf_mask);
-      if (last_tb) {
-        tb_add_jump(last_tb, c.tb_exit, tb);
+      if (!invalid_pc) {
+
+        last_tb = tb_htable_lookup(cpu, c.last_tb.pc, c.last_tb.cs_base,
+                                   c.last_tb.flags, c.cf_mask);
+        if (last_tb) {
+          tb_add_jump(last_tb, c.tb_exit, tb);
+        }
       }
     }
 
diff --git a/qemu_mode/patches/afl-qemu-cpu-translate-inl.h b/qemu_mode/patches/afl-qemu-cpu-translate-inl.h
new file mode 100644
index 00000000..0ca89c98
--- /dev/null
+++ b/qemu_mode/patches/afl-qemu-cpu-translate-inl.h
@@ -0,0 +1,125 @@
+/*
+   american fuzzy lop - high-performance binary-only instrumentation
+   -----------------------------------------------------------------
+
+   Written by Andrew Griffiths <agriffiths@google.com> and
+              Michal Zalewski <lcamtuf@google.com>
+
+   Idea & design very much by Andrew Griffiths.
+
+   TCG instrumentation and block chaining support by Andrea Biondo
+                                      <andrea.biondo965@gmail.com>
+   
+   QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi
+                                      <andreafioraldi@gmail.com>
+
+   Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This code is a shim patched into the separately-distributed source
+   code of QEMU 3.1.0. It leverages the built-in QEMU tracing functionality
+   to implement AFL-style instrumentation and to take care of the remaining
+   parts of the AFL fork server logic.
+
+   The resulting QEMU binary is essentially a standalone instrumentation
+   tool; for an example of how to leverage it for other purposes, you can
+   have a look at afl-showmap.c.
+
+ */
+
+#include "../../config.h"
+#include "tcg.h"
+#include "tcg-op.h"
+
+/* Declared in afl-qemu-cpu-inl.h */
+extern unsigned char *afl_area_ptr;
+extern unsigned int afl_inst_rms;
+extern abi_ulong afl_start_code, afl_end_code;
+extern u8 afl_enable_compcov;
+
+void tcg_gen_afl_compcov_log_call(void *func, target_ulong cur_loc,
+                                  TCGv_i64 arg1, TCGv_i64 arg2);
+
+static void afl_compcov_log_16(target_ulong cur_loc, target_ulong arg1,
+                               target_ulong arg2) {
+
+  if ((arg1 & 0xff) == (arg2 & 0xff)) {
+    afl_area_ptr[cur_loc]++;
+  }
+}
+
+static void afl_compcov_log_32(target_ulong cur_loc, target_ulong arg1,
+                               target_ulong arg2) {
+
+  if ((arg1 & 0xff) == (arg2 & 0xff)) {
+    afl_area_ptr[cur_loc]++;
+    if ((arg1 & 0xffff) == (arg2 & 0xffff)) {
+      afl_area_ptr[cur_loc +1]++;
+      if ((arg1 & 0xffffff) == (arg2 & 0xffffff)) {
+        afl_area_ptr[cur_loc +2]++;
+      }
+    }
+  }
+}
+
+static void afl_compcov_log_64(target_ulong cur_loc, target_ulong arg1,
+                               target_ulong arg2) {
+
+  if ((arg1 & 0xff) == (arg2 & 0xff)) {
+    afl_area_ptr[cur_loc]++;
+    if ((arg1 & 0xffff) == (arg2 & 0xffff)) {
+      afl_area_ptr[cur_loc +1]++;
+      if ((arg1 & 0xffffff) == (arg2 & 0xffffff)) {
+        afl_area_ptr[cur_loc +2]++;
+        if ((arg1 & 0xffffffff) == (arg2 & 0xffffffff)) {
+          afl_area_ptr[cur_loc +3]++;
+          if ((arg1 & 0xffffffffff) == (arg2 & 0xffffffffff)) {
+            afl_area_ptr[cur_loc +4]++;
+            if ((arg1 & 0xffffffffffff) == (arg2 & 0xffffffffffff)) {
+              afl_area_ptr[cur_loc +5]++;
+              if ((arg1 & 0xffffffffffffff) == (arg2 & 0xffffffffffffff)) {
+                afl_area_ptr[cur_loc +6]++;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+static void afl_gen_compcov(target_ulong cur_loc, TCGv_i64 arg1, TCGv_i64 arg2,
+                            TCGMemOp ot) {
+
+  void *func;
+  
+  if (!afl_enable_compcov || cur_loc > afl_end_code || cur_loc < afl_start_code)
+    return;
+
+  switch (ot) {
+    case MO_64:
+      func = &afl_compcov_log_64;
+      break;
+    case MO_32: 
+      func = &afl_compcov_log_32;
+      break;
+    case MO_16:
+      func = &afl_compcov_log_16;
+      break;
+    default:
+      return;
+  }
+  
+  cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+  cur_loc &= MAP_SIZE - 1;
+  
+  if (cur_loc >= afl_inst_rms) return;
+  
+  tcg_gen_afl_compcov_log_call(func, cur_loc, arg1, arg2);
+}
diff --git a/qemu_mode/patches/afl-qemu-tcg-inl.h b/qemu_mode/patches/afl-qemu-tcg-inl.h
index fab3d9e3..ff90d1b9 100644
--- a/qemu_mode/patches/afl-qemu-tcg-inl.h
+++ b/qemu_mode/patches/afl-qemu-tcg-inl.h
@@ -9,7 +9,8 @@
 
    TCG instrumentation and block chaining support by Andrea Biondo
                                       <andrea.biondo965@gmail.com>
-   QEMU 3.1.0 port and thread-safety by Andrea Fioraldi
+
+   QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi
                                       <andreafioraldi@gmail.com>
 
    Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
@@ -42,10 +43,10 @@ void tcg_gen_afl_maybe_log_call(target_ulong cur_loc)
     unsigned sizemask, flags;
     TCGOp *op;
 
-    TCGTemp *arg = tcgv_ptr_temp( tcg_const_tl(cur_loc) );
+    TCGTemp *arg = tcgv_i64_temp( tcg_const_tl(cur_loc) );
 
     flags = 0;
-    sizemask = dh_sizemask(void, 0) | dh_sizemask(ptr, 1);
+    sizemask = dh_sizemask(void, 0) | dh_sizemask(i64, 1);
 
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
@@ -151,7 +152,7 @@ void tcg_gen_afl_maybe_log_call(target_ulong cur_loc)
         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
            Note that describing these as TCGv_i64 eliminates an unnecessary
            zero-extension that tcg_gen_concat_i32_i64 would create.  */
-        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
+        tcg_gen_concat32_i64(temp_tcgv_i64(NULL), retl, reth);
         tcg_temp_free_i64(retl);
         tcg_temp_free_i64(reth);
     }
@@ -163,3 +164,143 @@ void tcg_gen_afl_maybe_log_call(target_ulong cur_loc)
 #endif /* TCG_TARGET_EXTEND_ARGS */
 }
 
+void tcg_gen_afl_compcov_log_call(void *func, target_ulong cur_loc, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    int i, real_args, nb_rets, pi;
+    unsigned sizemask, flags;
+    TCGOp *op;
+
+    const int nargs = 3;
+    TCGTemp *args[3] = { tcgv_i64_temp( tcg_const_tl(cur_loc) ),
+                         tcgv_i64_temp(arg1),
+                         tcgv_i64_temp(arg2) };
+
+    flags = 0;
+    sizemask = dh_sizemask(void, 0) | dh_sizemask(i64, 1) |
+               dh_sizemask(i64, 2) | dh_sizemask(i64, 3);
+
+#if defined(__sparc__) && !defined(__arch64__) \
+    && !defined(CONFIG_TCG_INTERPRETER)
+    /* We have 64-bit values in one register, but need to pass as two
+       separate parameters.  Split them.  */
+    int orig_sizemask = sizemask;
+    int orig_nargs = nargs;
+    TCGv_i64 retl, reth;
+    TCGTemp *split_args[MAX_OPC_PARAM];
+
+    retl = NULL;
+    reth = NULL;
+    if (sizemask != 0) {
+        for (i = real_args = 0; i < nargs; ++i) {
+            int is_64bit = sizemask & (1 << (i+1)*2);
+            if (is_64bit) {
+                TCGv_i64 orig = temp_tcgv_i64(args[i]);
+                TCGv_i32 h = tcg_temp_new_i32();
+                TCGv_i32 l = tcg_temp_new_i32();
+                tcg_gen_extr_i64_i32(l, h, orig);
+                split_args[real_args++] = tcgv_i32_temp(h);
+                split_args[real_args++] = tcgv_i32_temp(l);
+            } else {
+                split_args[real_args++] = args[i];
+            }
+        }
+        nargs = real_args;
+        args = split_args;
+        sizemask = 0;
+    }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+    for (i = 0; i < nargs; ++i) {
+        int is_64bit = sizemask & (1 << (i+1)*2);
+        int is_signed = sizemask & (2 << (i+1)*2);
+        if (!is_64bit) {
+            TCGv_i64 temp = tcg_temp_new_i64();
+            TCGv_i64 orig = temp_tcgv_i64(args[i]);
+            if (is_signed) {
+                tcg_gen_ext32s_i64(temp, orig);
+            } else {
+                tcg_gen_ext32u_i64(temp, orig);
+            }
+            args[i] = tcgv_i64_temp(temp);
+        }
+    }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+
+    op = tcg_emit_op(INDEX_op_call);
+
+    pi = 0;
+    nb_rets = 0;
+    TCGOP_CALLO(op) = nb_rets;
+
+    real_args = 0;
+    for (i = 0; i < nargs; i++) {
+        int is_64bit = sizemask & (1 << (i+1)*2);
+        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+            /* some targets want aligned 64 bit args */
+            if (real_args & 1) {
+                op->args[pi++] = TCG_CALL_DUMMY_ARG;
+                real_args++;
+            }
+#endif
+           /* If stack grows up, then we will be placing successive
+              arguments at lower addresses, which means we need to
+              reverse the order compared to how we would normally
+              treat either big or little-endian.  For those arguments
+              that will wind up in registers, this still works for
+              HPPA (the only current STACK_GROWSUP target) since the
+              argument registers are *also* allocated in decreasing
+              order.  If another such target is added, this logic may
+              have to get more complicated to differentiate between
+              stack arguments and register arguments.  */
+#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
+            op->args[pi++] = temp_arg(args[i] + 1);
+            op->args[pi++] = temp_arg(args[i]);
+#else
+            op->args[pi++] = temp_arg(args[i]);
+            op->args[pi++] = temp_arg(args[i] + 1);
+#endif
+            real_args += 2;
+            continue;
+        }
+
+        op->args[pi++] = temp_arg(args[i]);
+        real_args++;
+    }
+    op->args[pi++] = (uintptr_t)func;
+    op->args[pi++] = flags;
+    TCGOP_CALLI(op) = real_args;
+
+    /* Make sure the fields didn't overflow.  */
+    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
+    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
+
+#if defined(__sparc__) && !defined(__arch64__) \
+    && !defined(CONFIG_TCG_INTERPRETER)
+    /* Free all of the parts we allocated above.  */
+    for (i = real_args = 0; i < orig_nargs; ++i) {
+        int is_64bit = orig_sizemask & (1 << (i+1)*2);
+        if (is_64bit) {
+            tcg_temp_free_internal(args[real_args++]);
+            tcg_temp_free_internal(args[real_args++]);
+        } else {
+            real_args++;
+        }
+    }
+    if (orig_sizemask & 1) {
+        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
+           Note that describing these as TCGv_i64 eliminates an unnecessary
+           zero-extension that tcg_gen_concat_i32_i64 would create.  */
+        tcg_gen_concat32_i64(temp_tcgv_i64(NULL), retl, reth);
+        tcg_temp_free_i64(retl);
+        tcg_temp_free_i64(reth);
+    }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+    for (i = 0; i < nargs; ++i) {
+        int is_64bit = sizemask & (1 << (i+1)*2);
+        if (!is_64bit) {
+            tcg_temp_free_internal(args[i]);
+        }
+    }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+}
+
diff --git a/qemu_mode/patches/afl-qemu-translate-inl.h b/qemu_mode/patches/afl-qemu-translate-inl.h
index 74c827f5..bfb2897e 100644
--- a/qemu_mode/patches/afl-qemu-translate-inl.h
+++ b/qemu_mode/patches/afl-qemu-translate-inl.h
@@ -9,7 +9,8 @@
 
    TCG instrumentation and block chaining support by Andrea Biondo
                                       <andrea.biondo965@gmail.com>
-   QEMU 3.1.0 port and thread-safety by Andrea Fioraldi
+
+   QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi
                                       <andreafioraldi@gmail.com>
 
    Copyright 2015, 2016, 2017 Google Inc. All rights reserved.
@@ -41,12 +42,12 @@ extern abi_ulong afl_start_code, afl_end_code;
 
 void tcg_gen_afl_maybe_log_call(target_ulong cur_loc);
 
-void afl_maybe_log(void* cur_loc) { 
+void afl_maybe_log(target_ulong cur_loc) { 
 
   static __thread abi_ulong prev_loc;
 
-  afl_area_ptr[(abi_ulong)cur_loc ^ prev_loc]++;
-  prev_loc = (abi_ulong)cur_loc >> 1;
+  afl_area_ptr[cur_loc ^ prev_loc]++;
+  prev_loc = cur_loc >> 1;
 
 }
 
@@ -59,7 +60,7 @@ static void afl_gen_trace(target_ulong cur_loc) {
   if (cur_loc > afl_end_code || cur_loc < afl_start_code /*|| !afl_area_ptr*/) // not needed because of static dummy buffer
     return;
 
-  /* Looks like QEMU always maps to fixed locations, so ASAN is not a
+  /* Looks like QEMU always maps to fixed locations, so ASLR is not a
      concern. Phew. But instruction addresses may be aligned. Let's mangle
      the value to get something quasi-uniform. */
 
diff --git a/qemu_mode/patches/i386-translate.diff b/qemu_mode/patches/i386-translate.diff
new file mode 100644
index 00000000..0bc48828
--- /dev/null
+++ b/qemu_mode/patches/i386-translate.diff
@@ -0,0 +1,33 @@
+diff --git a/target/i386/translate.c b/target/i386/translate.c
+index 0dd5fbe4..b95d341e 100644
+--- a/target/i386/translate.c
++++ b/target/i386/translate.c
+@@ -32,6 +32,8 @@
+ #include "trace-tcg.h"
+ #include "exec/log.h"
+ 
++#include "../patches/afl-qemu-cpu-translate-inl.h"
++
+ #define PREFIX_REPZ   0x01
+ #define PREFIX_REPNZ  0x02
+ #define PREFIX_LOCK   0x04
+@@ -1343,9 +1345,11 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
+             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
+                                         s1->mem_index, ot | MO_LE);
+             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
++            afl_gen_compcov(s1->pc, s1->cc_srcT, s1->T1, ot);
+         } else {
+             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
++            afl_gen_compcov(s1->pc, s1->T0, s1->T1, ot);
+             gen_op_st_rm_T0_A0(s1, ot, d);
+         }
+         gen_op_update2_cc(s1);
+@@ -1389,6 +1393,7 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
+         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
+         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
+         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
++        afl_gen_compcov(s1->pc, s1->T0, s1->T1, ot);
+         set_cc_op(s1, CC_OP_SUBB + ot);
+         break;
+     }
diff --git a/sharedmem.c b/sharedmem.c
new file mode 100644
index 00000000..3fd38444
--- /dev/null
+++ b/sharedmem.c
@@ -0,0 +1,137 @@
+/*
+
+ */
+
+#define AFL_MAIN
+
+#include "config.h"
+#include "types.h"
+#include "debug.h"
+#include "alloc-inl.h"
+#include "hash.h"
+#include "sharedmem.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <signal.h>
+#include <dirent.h>
+#include <fcntl.h>
+
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/mman.h>
+
+#ifndef USEMMAP
+ #include <sys/ipc.h>
+ #include <sys/shm.h>
+#endif
+
+extern unsigned char*trace_bits;
+
+#ifdef USEMMAP
+/* ================ Proteas ================ */
+int g_shm_fd = -1;
+unsigned char *g_shm_base = NULL;
+char g_shm_file_path[L_tmpnam];
+/* ========================================= */
+#else
+static s32 shm_id;                    /* ID of the SHM region              */
+#endif
+
+/* Get rid of shared memory (atexit handler). */
+
+void remove_shm(void) {
+#ifdef USEMMAP
+  if (g_shm_base != NULL) {
+    munmap(g_shm_base, MAP_SIZE);
+    g_shm_base = NULL;
+  }
+
+  if (g_shm_fd != -1) {
+    close(g_shm_fd);
+    g_shm_fd = -1;
+  }
+#else
+  shmctl(shm_id, IPC_RMID, NULL);
+#endif
+}
+
+
+/* Configure shared memory. */
+
+void setup_shm(unsigned char dumb_mode) {
+#ifdef USEMMAP
+  /* generate random file name for multi instance */
+
+  /* thanks to f*cking glibc we can not use tmpnam securely, it generates a security warning that cannot be suppressed */
+  /* so we do this worse workaround */
+  snprintf(g_shm_file_path, L_tmpnam, "/afl_%d_%ld", getpid(), random());
+
+  /* create the shared memory segment as if it was a file */
+  g_shm_fd = shm_open(g_shm_file_path, O_CREAT | O_RDWR | O_EXCL, 0600);
+  if (g_shm_fd == -1) {
+    PFATAL("shm_open() failed");
+  }
+
+  /* configure the size of the shared memory segment */
+  if (ftruncate(g_shm_fd, MAP_SIZE)) {
+    PFATAL("setup_shm(): ftruncate() failed");
+  }
+
+  /* map the shared memory segment to the address space of the process */
+  g_shm_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, g_shm_fd, 0);
+  if (g_shm_base == MAP_FAILED) {
+    close(g_shm_fd);
+    g_shm_fd = -1;
+    PFATAL("mmap() failed");
+  }
+
+  atexit(remove_shm);
+
+  /* If somebody is asking us to fuzz instrumented binaries in dumb mode,
+     we don't want them to detect instrumentation, since we won't be sending
+     fork server commands. This should be replaced with better auto-detection
+     later on, perhaps? */
+
+  if (!dumb_mode) setenv(SHM_ENV_VAR, g_shm_file_path, 1);
+
+  trace_bits = g_shm_base;
+
+  if (!trace_bits) PFATAL("mmap() failed");
+  
+#else
+  u8* shm_str;
+
+  shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);
+
+  if (shm_id < 0) PFATAL("shmget() failed");
+
+  atexit(remove_shm);
+
+  shm_str = alloc_printf("%d", shm_id);
+
+  setenv(SHM_ENV_VAR, shm_str, 1);
+
+  /* If somebody is asking us to fuzz instrumented binaries in dumb mode,
+     we don't want them to detect instrumentation, since we won't be sending
+     fork server commands. This should be replaced with better auto-detection
+     later on, perhaps? */
+
+  if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1);
+
+  ck_free(shm_str);
+
+  trace_bits = shmat(shm_id, NULL, 0);
+  
+  if (!trace_bits) PFATAL("shmat() failed");
+
+#endif
+}
+
diff --git a/sharedmem.h b/sharedmem.h
new file mode 100644
index 00000000..53a85fcb
--- /dev/null
+++ b/sharedmem.h
@@ -0,0 +1,6 @@
+#ifndef __SHAREDMEM_H
+#define __SHAREDMEM_H
+
+void setup_shm(unsigned char dumb_mode);
+void remove_shm(void);
+#endif
diff --git a/types.h b/types.h
index 784d3a7a..7606d4ed 100644
--- a/types.h
+++ b/types.h
@@ -78,9 +78,14 @@ typedef int64_t  s64;
 #define STRINGIFY(x) STRINGIFY_INTERNAL(x)
 
 #define MEM_BARRIER() \
-  asm volatile("" ::: "memory")
+  __asm__ volatile("" ::: "memory")
 
-#define likely(_x)   __builtin_expect(!!(_x), 1)
-#define unlikely(_x)  __builtin_expect(!!(_x), 0)
+#if __GNUC__ < 6
+ #define likely(_x)   (_x)
+ #define unlikely(_x) (_x)
+#else
+ #define likely(_x)   __builtin_expect(!!(_x), 1)
+ #define unlikely(_x)  __builtin_expect(!!(_x), 0)
+#endif
 
 #endif /* ! _HAVE_TYPES_H */
diff --git a/unicorn_mode/README.md b/unicorn_mode/README.md
new file mode 100644
index 00000000..f12ed8bc
--- /dev/null
+++ b/unicorn_mode/README.md
@@ -0,0 +1,21 @@
+```
+        __ _                 _                      
+  __ _ / _| |    _   _ _ __ (_) ___ ___  _ __ _ __  
+ / _` | |_| |___| | | | '_ \| |/ __/ _ \| '__| '_ \ 
+| (_| |  _| |___| |_| | | | | | (_| (_) | |  | | | |
+ \__,_|_| |_|    \__,_|_| |_|_|\___\___/|_|  |_| |_|
+                                                      
+```
+
+afl-unicorn lets you fuzz any piece of binary that can be emulated by
+[Unicorn Engine](http://www.unicorn-engine.org/). 
+
+For the full readme please see docs/unicorn_mode.txt
+
+For an in-depth description of what this is, how to install it, and how to use
+it check out this [blog post](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf).
+
+For general help with AFL, please refer to the documents in the ./docs/ directory.
+
+Created by Nathan Voss, originally funded by
+[Battelle](https://www.battelle.org/cyber).
diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh
new file mode 100644
index 00000000..9dcf6773
--- /dev/null
+++ b/unicorn_mode/build_unicorn_support.sh
@@ -0,0 +1,191 @@
+#!/bin/sh
+#
+# american fuzzy lop - Unicorn-Mode build script
+# --------------------------------------
+#
+# Written by Nathan Voss <njvoss99@gmail.com>
+# 
+# Adapted from code by Andrew Griffiths <agriffiths@google.com> and
+#                      Michal Zalewski <lcamtuf@google.com>
+#
+# Adapted for Afl++ by Dominik Maier <mail@dmnk.co>
+#
+# Copyright 2017 Battelle Memorial Institute. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# This script downloads, patches, and builds a version of Unicorn with
+# minor tweaks to allow Unicorn-emulated binaries to be run under
+# afl-fuzz. 
+#
+# The modifications reside in patches/*. The standalone Unicorn library
+# will be written to /usr/lib/libunicornafl.so, and the Python bindings
+# will be installed system-wide.
+#
+# You must make sure that Unicorn Engine is not already installed before
+# running this script. If it is, please uninstall it first.
+
+UNICORN_URL="https://github.com/unicorn-engine/unicorn/archive/24f55a7973278f20f0de21b904851d99d4716263.tar.gz"
+UNICORN_SHA384="7180d47ca52c99b4c073a343a2ead91da1a829fdc3809f3ceada5d872e162962eab98873a8bc7971449d5f34f41fdb93"
+
+echo "================================================="
+echo "Unicorn-AFL build script"
+echo "================================================="
+echo
+
+echo "[*] Performing basic sanity checks..."
+
+if [ ! "`uname -s`" = "Linux" ]; then
+
+  echo "[-] Error: Unicorn instrumentation is supported only on Linux."
+  exit 1
+  
+fi
+
+if [ ! -f "patches/afl-unicorn-cpu-inl.h" -o ! -f "../config.h" ]; then
+
+  echo "[-] Error: key files not found - wrong working directory?"
+  exit 1
+
+fi
+
+if [ ! -f "../afl-showmap" ]; then
+
+  echo "[-] Error: ../afl-showmap not found - compile AFL first!"
+  exit 1
+
+fi
+
+for i in wget python automake autoconf sha384sum; do
+
+  T=`which "$i" 2>/dev/null`
+
+  if [ "$T" = "" ]; then
+
+    echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i'."
+    exit 1
+
+  fi
+
+done
+
+if ! which easy_install > /dev/null; then
+
+  # work around for unusual installs
+  if [ '!' -e /usr/lib/python2.7/dist-packages/easy_install.py ]; then
+
+    echo "[-] Error: Python setup-tools not found. Run 'sudo apt-get install python-setuptools'."
+    exit 1
+
+  fi
+
+fi
+
+if echo "$CC" | grep -qF /afl-; then
+
+  echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
+  exit 1
+
+fi
+
+echo "[+] All checks passed!"
+
+ARCHIVE="`basename -- "$UNICORN_URL"`"
+
+CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
+
+if [ ! "$CKSUM" = "$UNICORN_SHA384" ]; then
+
+  echo "[*] Downloading Unicorn v1.0.1 from the web..."
+  rm -f "$ARCHIVE"
+  wget -O "$ARCHIVE" -- "$UNICORN_URL" || exit 1
+
+  CKSUM=`sha384sum -- "$ARCHIVE" 2>/dev/null | cut -d' ' -f1`
+
+fi
+
+if [ "$CKSUM" = "$UNICORN_SHA384" ]; then
+
+  echo "[+] Cryptographic signature on $ARCHIVE checks out."
+
+else
+
+  echo "[-] Error: signature mismatch on $ARCHIVE (perhaps download error?)."
+  exit 1
+
+fi
+
+echo "[*] Uncompressing archive (this will take a while)..."
+
+rm -rf "unicorn" || exit 1
+mkdir "unicorn" || exit 1
+tar xzf "$ARCHIVE" -C ./unicorn --strip-components=1 || exit 1
+
+echo "[+] Unpacking successful."
+
+rm -rf "$ARCHIVE" || exit 1
+
+echo "[*] Applying patches..."
+
+cp patches/afl-unicorn-cpu-inl.h unicorn || exit 1
+patch -p1 --directory unicorn <patches/patches.diff || exit 1
+
+echo "[+] Patching done."
+
+echo "[*] Configuring Unicorn build..."
+
+cd "unicorn" || exit 1
+
+echo "[+] Configuration complete."
+
+echo "[*] Attempting to build Unicorn (fingers crossed!)..."
+
+UNICORN_QEMU_FLAGS='--python=python2' make || exit 1
+
+echo "[+] Build process successful!"
+
+echo "[*] Installing Unicorn python bindings..."
+cd bindings/python || exit 1
+if [ -z "$VIRTUAL_ENV" ]; then
+  echo "[*] Info: Installing python unicorn using --user"
+  python setup.py install --user || exit 1
+else
+  echo "[*] Info: Installing python unicorn to virtualenv: $VIRTUAL_ENV"
+  python setup.py install || exit 1
+fi
+export LIBUNICORN_PATH='$(pwd)' # in theory, this allows to switch between afl-unicorn and unicorn so files.
+
+cd ../../ || exit 1
+
+echo "[+] Unicorn bindings installed successfully."
+
+# Compile the sample, run it, verify that it works!
+echo "[*] Testing unicorn-mode functionality by running a sample test harness under afl-unicorn"
+
+cd ../samples/simple || exit 1
+
+# Run afl-showmap on the sample application. If anything comes out then it must have worked!
+unset AFL_INST_RATIO
+echo 0 | ../../../afl-showmap -U -m none -q -o .test-instr0 -- python simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
+
+if [ -s .test-instr0 ]
+then
+  
+  echo "[+] Instrumentation tests passed. "
+  echo "[+] All set, you can now use Unicorn mode (-U) in afl-fuzz!"
+  RETVAL=0
+
+else
+
+  echo "[-] Error: Unicorn mode doesn't seem to work!"
+  RETVAL=1
+
+fi
+
+rm -f .test-instr0
+
+exit $RETVAL
diff --git a/unicorn_mode/helper_scripts/template_test_harness.py b/unicorn_mode/helper_scripts/template_test_harness.py
new file mode 100644
index 00000000..93c526cc
--- /dev/null
+++ b/unicorn_mode/helper_scripts/template_test_harness.py
@@ -0,0 +1,104 @@
+"""
+    template_test_harness.py
+
+    Template which loads the context of a process into a Unicorn Engine,
+    instance, loads a custom (mutated) inputs, and executes the 
+    desired code. Designed to be used in conjunction with one of the
+    Unicorn Context Dumper scripts.
+
+    Author:
+        Nathan Voss <njvoss299@gmail.com>
+"""
+
+import argparse
+
+from unicorn import *
+from unicorn.x86_const import *  # TODO: Set correct architecture here as necessary
+
+import unicorn_loader 
+
+# Simple stand-in heap to prevent OS/kernel issues
+unicorn_heap = None
+
+# Start and end address of emulation
+START_ADDRESS = # TODO: Set start address here
+END_ADDRESS   = # TODO: Set end address here
+
+"""
+    Implement target-specific hooks in here.
+    Stub out, skip past, and re-implement necessary functionality as appropriate
+"""
+def unicorn_hook_instruction(uc, address, size, user_data):
+
+    # TODO: Setup hooks and handle anything you need to here
+    #    - For example, hook malloc/free/etc. and handle it internally
+    pass
+
+#------------------------
+#---- Main test function  
+
+def main():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('context_dir', type=str, help="Directory containing process context")
+    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input content")
+    parser.add_argument('-d', '--debug', default=False, action="store_true", help="Dump trace info")
+    args = parser.parse_args()
+
+    print("Loading context from {}".format(args.context_dir))
+    uc = unicorn_loader.AflUnicornEngine(args.context_dir, enable_trace=args.debug, debug_print=False)       
+
+    # Instantiate the hook function to avoid emulation errors
+    global unicorn_heap
+    unicorn_heap = unicorn_loader.UnicornSimpleHeap(uc, debug_print=True)
+    uc.hook_add(UC_HOOK_CODE, unicorn_hook_instruction)
+
+    # Execute 1 instruction just to startup the forkserver
+    # NOTE: This instruction will be executed again later, so be sure that
+    #       there are no negative consequences to the overall execution state.
+    #       If there are, change the later call to emu_start to no re-execute 
+    #       the first instruction.
+    print("Starting the forkserver by executing 1 instruction")
+    try:
+        uc.emu_start(START_ADDRESS, 0, 0, count=1)
+    except UcError as e:
+        print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
+        return
+
+    # Allocate a buffer and load a mutated input and put it into the right spot
+    if args.input_file:
+        print("Loading input content from {}".format(args.input_file))
+        input_file = open(args.input_file, 'rb')
+        input_content = input_file.read()
+        input_file.close()
+
+        # TODO: Apply constraints to mutated input here
+        raise exceptions.NotImplementedError('No constraints on the mutated inputs have been set!')
+        
+        # Allocate a new buffer and put the input into it
+        buf_addr = unicorn_heap.malloc(len(input_content))
+        uc.mem_write(buf_addr, input_content)
+        print("Allocated mutated input buffer @ 0x{0:016x}".format(buf_addr))
+
+        # TODO: Set the input into the state so it will be handled
+        raise exceptions.NotImplementedError('The mutated input was not loaded into the Unicorn state!')
+        
+    # Run the test
+    print("Executing from 0x{0:016x} to 0x{1:016x}".format(START_ADDRESS, END_ADDRESS))
+    try:
+        result = uc.emu_start(START_ADDRESS, END_ADDRESS, timeout=0, count=0)
+    except UcError as e:
+        # If something went wrong during emulation a signal is raised to force this 
+        # script to crash in a way that AFL can detect ('uc.force_crash()' should be
+        # called for any condition that you want AFL to treat as a crash).
+        print("Execution failed with error: {}".format(e))
+        uc.dump_regs() 
+        uc.force_crash(e)
+
+    print("Final register state:")    
+    uc.dump_regs()
+
+    print("Done.")    
+        
+if __name__ == "__main__":
+    main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
new file mode 100644
index 00000000..22b9fd47
--- /dev/null
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
@@ -0,0 +1,190 @@
+"""
+    unicorn_dumper_gdb.py
+    
+    When run with GDB sitting at a debug breakpoint, this
+    dumps the current state (registers/memory/etc) of
+    the process to a directory consisting of an index 
+    file with register and segment information and 
+    sub-files containing all actual process memory.
+    
+    The output of this script is expected to be used 
+    to initialize context for Unicorn emulation.
+
+    -----------
+
+    In order to run this script, GEF needs to be running in the GDB session (gef.py)
+    # HELPERS from: https://github.com/hugsy/gef/blob/master/gef.py
+    It can be loaded with:
+      source <path_to_gef>/gef.py
+
+    Call this function when at a breakpoint in your process with:
+      source unicorn_dumper_gdb.py
+
+    -----------
+
+
+"""
+
+import datetime
+import hashlib
+import json
+import os
+import sys
+import time
+import zlib
+
+# GDB Python SDK
+import gdb
+
+# Maximum segment size that we'll store
+# Yep, this could break stuff pretty quickly if we
+# omit something that's used during emulation.
+MAX_SEG_SIZE = 128 * 1024 * 1024
+
+# Name of the index file
+INDEX_FILE_NAME = "_index.json"
+
+#----------------------
+#---- Helper Functions
+
+def map_arch():
+    arch = get_arch() # from GEF
+    if 'x86_64' in arch or 'x86-64' in arch:
+        return "x64"
+    elif 'x86' in arch or 'i386' in arch:
+        return "x86"
+    elif 'aarch64' in arch or 'arm64' in arch:
+        return "arm64le"
+    elif 'aarch64_be' in arch:
+        return "arm64be"
+    elif 'armeb' in arch:
+        # check for THUMB mode
+        cpsr = get_register('cpsr')
+        if (cpsr & (1 << 5)):
+            return "armbethumb"
+        else:
+            return "armbe"
+    elif 'arm' in arch:
+        # check for THUMB mode
+        cpsr = get_register('cpsr')
+        if (cpsr & (1 << 5)):
+            return "armlethumb"
+        else:
+            return "armle"
+    else:
+        return ""
+
+
+#-----------------------
+#---- Dumping functions
+
+def dump_arch_info():
+    arch_info = {}
+    arch_info["arch"] = map_arch()
+    return arch_info
+
+
+def dump_regs():
+    reg_state = {}
+    for reg in current_arch.all_registers:
+        reg_val = get_register(reg)
+        # current dumper script looks for register values to be hex strings
+#         reg_str = "0x{:08x}".format(reg_val)
+#         if "64" in get_arch():
+#             reg_str = "0x{:016x}".format(reg_val)
+#         reg_state[reg.strip().strip('$')] = reg_str
+        reg_state[reg.strip().strip('$')] = reg_val
+    return reg_state
+
+
+def dump_process_memory(output_dir):
+    # Segment information dictionary
+    final_segment_list = []
+    
+    # GEF:
+    vmmap = get_process_maps()
+    if not vmmap:
+        print("No address mapping information found")
+        return final_segment_list
+
+    for entry in vmmap:
+        if entry.page_start == entry.page_end:
+            continue
+        
+        seg_info = {'start': entry.page_start, 'end': entry.page_end, 'name': entry.path, 'permissions': {
+            "r": entry.is_readable() > 0,
+            "w": entry.is_writable() > 0,
+            "x": entry.is_executable() > 0
+        }, 'content_file': ''}
+
+        # "(deleted)" may or may not be valid, but don't push it.
+        if entry.is_readable() and not '(deleted)' in entry.path:
+            try:
+                # Compress and dump the content to a file
+                seg_content = read_memory(entry.page_start, entry.size)
+                if(seg_content == None):
+                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.page_start, entry.path))
+                else:
+                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.page_start, len(seg_content), entry.path, repr(seg_info['permissions'])))
+                    compressed_seg_content = zlib.compress(seg_content)
+                    md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
+                    seg_info["content_file"] = md5_sum
+                    
+                    # Write the compressed contents to disk
+                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file.write(compressed_seg_content)
+                    out_file.close()
+
+            except:
+                print("Exception reading segment ({}): {}".format(entry.path, sys.exc_info()[0]))
+        else:
+            print("Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start))
+
+        # Add the segment to the list
+        final_segment_list.append(seg_info)
+
+            
+    return final_segment_list
+
+#----------
+#---- Main    
+    
+def main():
+    print("----- Unicorn Context Dumper -----")
+    print("You must be actively debugging before running this!")
+    print("If it fails, double check that you are actively debugging before running.")
+    try:
+        GEF_TEST = set_arch()
+    except Exception as e:
+        print("!!! GEF not running in GDB.  Please run gef.py by executing:")
+        print('\tpython execfile ("<path_to_gef>/gef.py")')
+        return
+    
+    try:
+    
+        # Create the output directory
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        output_path = "UnicornContext_" + timestamp
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+        print("Process context will be output to {}".format(output_path))
+            
+        # Get the context
+        context = {
+            "arch": dump_arch_info(),
+            "regs": dump_regs(), 
+            "segments": dump_process_memory(output_path),
+        }
+
+        # Write the index file
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file.write(json.dumps(context, indent=4))
+        index_file.close()    
+        print("Done.")
+        
+    except Exception as e:
+        print("!!! ERROR:\n\t{}".format(repr(e)))
+        
+if __name__ == "__main__":
+    main()
+    
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_ida.py b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
new file mode 100644
index 00000000..6cf9f30f
--- /dev/null
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
@@ -0,0 +1,209 @@
+"""
+    unicorn_dumper_ida.py
+    
+    When run with IDA (<v7) sitting at a debug breakpoint, 
+    dumps the current state (registers/memory/etc) of
+    the process to a directory consisting of an index 
+    file with register and segment information and 
+    sub-files containing all actual process memory.
+    
+    The output of this script is expected to be used 
+    to initialize context for Unicorn emulation.
+"""
+
+import datetime
+import hashlib
+import json
+import os
+import sys
+import time
+import zlib
+
+# IDA Python SDK
+from idaapi import *
+from idc import *
+
+# Maximum segment size that we'll store
+# Yep, this could break stuff pretty quickly if we
+# omit something that's used during emulation.
+MAX_SEG_SIZE = 128 * 1024 * 1024
+
+# Name of the index file
+INDEX_FILE_NAME = "_index.json"
+
+#----------------------
+#---- Helper Functions
+
+def get_arch():
+    if ph.id == PLFM_386 and ph.flag & PR_USE64:
+        return "x64"
+    elif ph.id == PLFM_386 and ph.flag & PR_USE32:
+        return "x86"
+    elif ph.id == PLFM_ARM and ph.flag & PR_USE64:
+        if cvar.inf.is_be():
+            return "arm64be"
+        else:
+            return "arm64le"
+    elif ph.id == PLFM_ARM and ph.flag & PR_USE32:
+        if cvar.inf.is_be():
+            return "armbe"
+        else:
+            return "armle"
+    else:
+        return ""
+
+def get_register_list(arch):
+    if arch == "arm64le" or arch == "arm64be":
+        arch = "arm64"
+    elif arch == "armle" or arch == "armbe":
+        arch = "arm"
+
+    registers = {
+        "x64" : [
+            "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp",
+            "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+            "rip", "rsp", "efl",
+            "cs", "ds", "es", "fs", "gs", "ss",
+        ],
+        "x86" : [
+            "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
+            "eip", "esp", "efl", 
+            "cs", "ds", "es", "fs", "gs", "ss",
+        ],        
+        "arm" : [
+            "R0", "R1", "R2",  "R3",  "R4",  "R5", "R6", "R7",  
+            "R8", "R9", "R10", "R11", "R12", "PC", "SP", "LR",  
+            "PSR",
+        ],
+        "arm64" : [
+            "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",  
+            "X8", "X9", "X10", "X11", "X12", "X13", "X14", 
+            "X15", "X16", "X17", "X18", "X19", "X20", "X21", 
+            "X22", "X23", "X24", "X25", "X26", "X27", "X28", 
+            "PC", "SP", "FP", "LR", "CPSR"
+            #    "NZCV",
+        ]
+    }
+    return registers[arch]  
+
+#-----------------------
+#---- Dumping functions
+
+def dump_arch_info():
+    arch_info = {}
+    arch_info["arch"] = get_arch()
+    return arch_info
+
+def dump_regs():
+    reg_state = {}
+    for reg in get_register_list(get_arch()):
+        reg_state[reg] = GetRegValue(reg)
+    return reg_state
+
+def dump_process_memory(output_dir):
+    # Segment information dictionary
+    segment_list = []
+    
+    # Loop over the segments, fill in the info dictionary
+    for seg_ea in Segments():
+        seg_start = SegStart(seg_ea)
+        seg_end = SegEnd(seg_ea)
+        seg_size = seg_end - seg_start
+		
+        seg_info = {}
+        seg_info["name"]  = SegName(seg_ea)
+        seg_info["start"] = seg_start
+        seg_info["end"]   = seg_end
+        
+        perms = getseg(seg_ea).perm
+        seg_info["permissions"] = {
+            "r": False if (perms & SEGPERM_READ)  == 0 else True,
+            "w": False if (perms & SEGPERM_WRITE) == 0 else True,
+            "x": False if (perms & SEGPERM_EXEC)  == 0 else True,
+        }
+
+        if (perms & SEGPERM_READ) and seg_size <= MAX_SEG_SIZE and isLoaded(seg_start):
+            try:
+                # Compress and dump the content to a file
+                seg_content = get_many_bytes(seg_start, seg_end - seg_start)
+                if(seg_content == None):
+                    print("Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(SegName(seg_ea), seg_ea))
+                    seg_info["content_file"] = ""
+                else:
+                    print("Dumping segment {0}@0x{1:016x} (size:{2})".format(SegName(seg_ea), seg_ea, len(seg_content)))
+                    compressed_seg_content = zlib.compress(seg_content)
+                    md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
+                    seg_info["content_file"] = md5_sum
+                    
+                    # Write the compressed contents to disk
+                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file.write(compressed_seg_content)
+                    out_file.close()
+            except:
+                print("Exception reading segment: {}".format(sys.exc_info()[0]))
+                seg_info["content_file"] = ""
+        else:
+            print("Skipping segment {0}@0x{1:016x}".format(SegName(seg_ea), seg_ea))
+            seg_info["content_file"] = ""
+            
+        # Add the segment to the list
+        segment_list.append(seg_info)     
+   
+    return segment_list
+
+"""
+    TODO: FINISH IMPORT DUMPING
+def import_callback(ea, name, ord):
+    if not name:
+    else:
+    
+    # True -> Continue enumeration
+    # False -> End enumeration
+    return True
+    
+def dump_imports():
+    import_dict = {}
+    
+    for i in xrange(0, number_of_import_modules):
+        enum_import_names(i, import_callback)
+    
+    return import_dict
+"""
+ 
+#----------
+#---- Main    
+    
+def main():
+
+    try:
+        print("----- Unicorn Context Dumper -----")
+        print("You must be actively debugging before running this!")
+        print("If it fails, double check that you are actively debugging before running.")
+
+        # Create the output directory
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        output_path = os.path.dirname(os.path.abspath(GetIdbPath()))
+        output_path = os.path.join(output_path, "UnicornContext_" + timestamp)
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+        print("Process context will be output to {}".format(output_path))
+            
+        # Get the context
+        context = {
+            "arch": dump_arch_info(),
+            "regs": dump_regs(), 
+            "segments": dump_process_memory(output_path),
+            #"imports": dump_imports(),
+        }
+
+        # Write the index file
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file.write(json.dumps(context, indent=4))
+        index_file.close()    
+        print("Done.")
+        
+    except Exception, e:
+        print("!!! ERROR:\n\t{}".format(str(e)))
+        
+if __name__ == "__main__":
+    main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
new file mode 100644
index 00000000..3c019d77
--- /dev/null
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
@@ -0,0 +1,299 @@
+"""
+    unicorn_dumper_lldb.py
+    
+    When run with LLDB sitting at a debug breakpoint, this
+    dumps the current state (registers/memory/etc) of
+    the process to a directory consisting of an index 
+    file with register and segment information and 
+    sub-files containing all actual process memory.
+    
+    The output of this script is expected to be used 
+    to initialize context for Unicorn emulation.
+
+    -----------
+
+    Call this function when at a breakpoint in your process with:
+      command script import -r unicorn_dumper_lldb
+
+    If there is trouble with "split on a NoneType", issue the following command:
+      script lldb.target.triple
+
+    and try to import the script again.
+
+    -----------
+
+"""
+
+from copy import deepcopy
+import datetime
+import hashlib
+import json
+import os
+import sys
+import time
+import zlib
+
+# LLDB Python SDK
+import lldb
+
+# Maximum segment size that we'll store
+# Yep, this could break stuff pretty quickly if we
+# omit something that's used during emulation.
+MAX_SEG_SIZE = 128 * 1024 * 1024
+
+# Name of the index file
+INDEX_FILE_NAME = "_index.json"
+DEBUG_MEM_FILE_NAME = "_memory.json"
+
+# Page size required by Unicorn
+UNICORN_PAGE_SIZE = 0x1000
+
+# Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
+ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
+ALIGN_PAGE_UP   = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
+
+#----------------------
+#---- Helper Functions
+
+def overlap_alignments(segments, memory):
+    final_list = []
+    curr_seg_idx = 0
+    curr_end_addr = 0
+    curr_node = None
+    current_segment = None
+    sorted_segments = sorted(segments, key=lambda k: (k['start'], k['end']))
+    if curr_seg_idx < len(sorted_segments):
+        current_segment = sorted_segments[curr_seg_idx]
+    for mem in sorted(memory, key=lambda k: (k['start'], -k['end'])):
+        if curr_node is None:
+            if current_segment is not None and current_segment['start'] == mem['start']:
+                curr_node = deepcopy(current_segment)
+                curr_node['permissions'] = mem['permissions']
+            else:
+                curr_node = deepcopy(mem)
+
+            curr_end_addr = curr_node['end']
+
+        while curr_end_addr <= mem['end']:
+            if curr_node['end'] == mem['end']:
+                if current_segment is not None and current_segment['start'] > curr_node['start'] and current_segment['start'] < curr_node['end']:
+                    curr_node['end'] = current_segment['start']
+                    if(curr_node['end'] > curr_node['start']):
+                        final_list.append(curr_node)
+                    curr_node = deepcopy(current_segment)
+                    curr_node['permissions'] = mem['permissions']
+                    curr_end_addr = curr_node['end']
+                else:
+                    if(curr_node['end'] > curr_node['start']):
+                        final_list.append(curr_node)
+                    # if curr_node is a segment
+                    if current_segment is not None and current_segment['end'] == mem['end']:
+                        curr_seg_idx += 1
+                        if curr_seg_idx < len(sorted_segments):
+                            current_segment = sorted_segments[curr_seg_idx]
+                        else:
+                            current_segment = None
+
+                    curr_node = None
+                    break
+            # could only be a segment
+            else:
+                if curr_node['end'] < mem['end']:
+                    # check for remaining segments and valid segments
+                    if(curr_node['end'] > curr_node['start']):
+                        final_list.append(curr_node)
+          
+                    curr_seg_idx += 1
+                    if curr_seg_idx < len(sorted_segments):
+                        current_segment = sorted_segments[curr_seg_idx]
+                    else:
+                        current_segment = None
+                        
+                    if current_segment is not None and current_segment['start'] <= curr_end_addr and current_segment['start'] < mem['end']:
+                        curr_node = deepcopy(current_segment)
+                        curr_node['permissions'] = mem['permissions']
+                    else:
+                        # no more segments
+                        curr_node = deepcopy(mem)
+                        
+                    curr_node['start'] = curr_end_addr
+                    curr_end_addr = curr_node['end']
+
+    return final_list    
+
+# https://github.com/llvm-mirror/llvm/blob/master/include/llvm/ADT/Triple.h
+def get_arch():
+    arch, arch_vendor, arch_os = lldb.target.GetTriple().split('-')
+    if arch == 'x86_64':
+        return "x64"
+    elif arch == 'x86' or arch == 'i386':
+        return "x86"
+    elif arch == 'aarch64' or arch == 'arm64':
+        return "arm64le"
+    elif arch == 'aarch64_be':
+        return "arm64be"
+    elif arch == 'armeb':
+        return "armbe"
+    elif arch == 'arm':
+        return "armle"
+    else:
+        return ""
+
+
+#-----------------------
+#---- Dumping functions
+
+def dump_arch_info():
+    arch_info = {}
+    arch_info["arch"] = get_arch()
+    return arch_info
+
+
+def dump_regs():
+    reg_state = {}
+    for reg_list in lldb.frame.GetRegisters():
+        if 'general purpose registers' in reg_list.GetName().lower():
+            for reg in reg_list:
+                reg_state[reg.GetName()] = int(reg.GetValue(), 16)
+    return reg_state
+
+def get_section_info(sec):
+    name = sec.name if sec.name is not None else ''
+    if sec.GetParent().name is not None:
+        name = sec.GetParent().name + '.' + sec.name
+
+    module_name = sec.addr.module.file.GetFilename()
+    module_name = module_name if module_name is not None else ''
+    long_name = module_name + '.' + name
+    
+    return sec.addr.load_addr, (sec.addr.load_addr + sec.size), sec.size, long_name
+ 
+
+def dump_process_memory(output_dir):
+    # Segment information dictionary
+    raw_segment_list = []
+    raw_memory_list = []
+    
+    # 1st pass:
+    # Loop over the segments, fill in the segment info dictionary
+    for module in lldb.target.module_iter():
+        for seg_ea in module.section_iter():
+            seg_info = {'module': module.file.GetFilename() }
+            seg_info['start'], seg_info['end'], seg_size, seg_info['name'] = get_section_info(seg_ea)
+            # TODO: Ugly hack for -1 LONG address on 32-bit
+            if seg_info['start'] >= sys.maxint or seg_size <= 0:
+                print "Throwing away page: {}".format(seg_info['name'])     
+                continue
+
+            # Page-align segment
+            seg_info['start'] = ALIGN_PAGE_DOWN(seg_info['start'])
+            seg_info['end'] = ALIGN_PAGE_UP(seg_info['end'])
+            print("Appending: {}".format(seg_info['name']))
+            raw_segment_list.append(seg_info)
+
+    # Add the stack memory region (just hardcode 0x1000 around the current SP)
+    sp = lldb.frame.GetSP()
+    start_sp = ALIGN_PAGE_DOWN(sp)
+    raw_segment_list.append({'start': start_sp, 'end': start_sp + 0x1000, 'name': 'STACK'})
+
+    # Write the original memory to file for debugging
+    index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), 'w')
+    index_file.write(json.dumps(raw_segment_list, indent=4))
+    index_file.close()    
+
+    # Loop over raw memory regions 
+    mem_info = lldb.SBMemoryRegionInfo()
+    start_addr = -1
+    next_region_addr = 0
+    while next_region_addr > start_addr:
+        err = lldb.process.GetMemoryRegionInfo(next_region_addr, mem_info)
+        # TODO: Should check err.success.  If False, what do we do?
+        if not err.success:
+            break
+        next_region_addr = mem_info.GetRegionEnd()
+        if next_region_addr >= sys.maxsize:
+            break
+
+        start_addr = mem_info.GetRegionBase()
+        end_addr = mem_info.GetRegionEnd()
+
+        # Unknown region name
+        region_name = 'UNKNOWN'
+
+        # Ignore regions that aren't even mapped
+        if mem_info.IsMapped() and mem_info.IsReadable():
+            mem_info_obj = {'start': start_addr, 'end': end_addr, 'name': region_name, 'permissions': {
+                "r": mem_info.IsReadable(),
+                "w": mem_info.IsWritable(),
+                "x": mem_info.IsExecutable()
+            }}
+
+            raw_memory_list.append(mem_info_obj)
+
+    final_segment_list = overlap_alignments(raw_segment_list, raw_memory_list)
+
+    for seg_info in final_segment_list:
+        try:
+            seg_info['content_file'] = ''
+            start_addr = seg_info['start']
+            end_addr = seg_info['end']
+            region_name = seg_info['name']
+            # Compress and dump the content to a file
+            err = lldb.SBError()
+            seg_content = lldb.process.ReadMemory(start_addr, end_addr - start_addr, err)
+            if(seg_content == None):
+                print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(start_addr, region_name))
+                seg_info['content_file'] = ''
+            else:
+                print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(start_addr, len(seg_content), region_name, repr(seg_info['permissions'])))
+                compressed_seg_content = zlib.compress(seg_content)
+                md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
+                seg_info['content_file'] = md5_sum
+                
+                # Write the compressed contents to disk
+                out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                out_file.write(compressed_seg_content)
+                out_file.close()
+    
+        except:
+            print("Exception reading segment ({}): {}".format(region_name, sys.exc_info()[0]))
+            
+    return final_segment_list
+
+#----------
+#---- Main    
+    
+def main():
+
+    try:
+        print("----- Unicorn Context Dumper -----")
+        print("You must be actively debugging before running this!")
+        print("If it fails, double check that you are actively debugging before running.")
+        
+        # Create the output directory
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        output_path = "UnicornContext_" + timestamp
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+        print("Process context will be output to {}".format(output_path))
+            
+        # Get the context
+        context = {
+            "arch": dump_arch_info(),
+            "regs": dump_regs(), 
+            "segments": dump_process_memory(output_path),
+        }
+    
+        # Write the index file
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file.write(json.dumps(context, indent=4))
+        index_file.close()    
+        print("Done.")
+        
+    except Exception, e:
+        print("!!! ERROR:\n\t{}".format(repr(e)))
+        
+if __name__ == "__main__":
+    main()
+elif lldb.debugger:
+    main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
new file mode 100644
index 00000000..bf2367cf
--- /dev/null
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
@@ -0,0 +1,224 @@
+"""
+    unicorn_dumper_pwndbg.py
+    
+    When run with GDB sitting at a debug breakpoint, this
+    dumps the current state (registers/memory/etc) of
+    the process to a directory consisting of an index 
+    file with register and segment information and 
+    sub-files containing all actual process memory.
+    
+    The output of this script is expected to be used 
+    to initialize context for Unicorn emulation.
+
+    -----------
+
+    In order to run this script, PWNDBG needs to be running in the GDB session (gdbinit.py)
+    # HELPERS from: https://github.com/pwndbg/pwndbg
+    It can be loaded with:
+      source <path_to_pwndbg>/gdbinit.py
+
+    Call this function when at a breakpoint in your process with:
+      source unicorn_dumper_pwndbg.py
+
+    -----------
+
+
+"""
+
+import datetime
+import hashlib
+import json
+import os
+import sys
+import time
+import zlib
+
+# GDB Python SDK
+import gdb
+
+pwndbg_loaded = False
+
+try:
+    import pwndbg.arch
+    import pwndbg.regs
+    import pwndbg.vmmap
+    import pwndbg.memory
+
+    pwndbg_loaded = True
+
+except ImportError:
+    print("!!! PWNGDB not running in GDB.  Please run gdbinit.py by executing:")
+    print('\tpython execfile ("<path_to_pwndbg>/gdbinit.py")')
+
+# Maximum segment size that we'll store
+# Yep, this could break stuff pretty quickly if we
+# omit something that's used during emulation.
+MAX_SEG_SIZE = 128 * 1024 * 1024
+
+# Name of the index file
+INDEX_FILE_NAME = "_index.json"
+
+#----------------------
+#---- Helper Functions
+
+def map_arch():
+    arch = pwndbg.arch.current # from PWNDBG
+    if 'x86_64' in arch or 'x86-64' in arch:
+        return "x64"
+    elif 'x86' in arch or 'i386' in arch:
+        return "x86"
+    elif 'aarch64' in arch or 'arm64' in arch:
+        return "arm64le"
+    elif 'aarch64_be' in arch:
+        return "arm64be"
+    elif 'arm' in arch:
+        cpsr = pwndbg.regs['cpsr']
+        # check endianess 
+        if pwndbg.arch.endian == 'big':
+            # check for THUMB mode
+            if (cpsr & (1 << 5)):
+                return "armbethumb"
+            else:
+                return "armbe"
+        else:
+            # check for THUMB mode
+            if (cpsr & (1 << 5)):
+                return "armlethumb"
+            else:
+                return "armle"
+    elif 'mips' in arch:
+        if pwndbg.arch.endian == 'little':
+            return 'mipsel'
+        else:
+            return 'mips'
+    else:
+        return ""
+
+
+#-----------------------
+#---- Dumping functions
+
+def dump_arch_info():
+    arch_info = {}
+    arch_info["arch"] = map_arch()
+    return arch_info
+
+
+def dump_regs():
+    reg_state = {}
+    for reg in pwndbg.regs.all:
+        reg_val = pwndbg.regs[reg]
+        # current dumper script looks for register values to be hex strings
+#         reg_str = "0x{:08x}".format(reg_val)
+#         if "64" in get_arch():
+#             reg_str = "0x{:016x}".format(reg_val)
+#         reg_state[reg.strip().strip('$')] = reg_str
+        reg_state[reg.strip().strip('$')] = reg_val
+    return reg_state
+
+
+def dump_process_memory(output_dir):
+    # Segment information dictionary
+    final_segment_list = []
+    
+    # PWNDBG:
+    vmmap = pwndbg.vmmap.get()
+    
+    # Pointer to end of last dumped memory segment
+    segment_last_addr = 0x0;
+
+    start = None
+    end   = None
+
+    if not vmmap:
+        print("No address mapping information found")
+        return final_segment_list
+
+    # Assume segment entries are sorted by start address
+    for entry in vmmap:
+        if entry.start == entry.end:
+            continue
+
+        start = entry.start
+        end   = entry.end
+
+        if (segment_last_addr > entry.start): # indicates overlap
+            if (segment_last_addr > entry.end): # indicates complete overlap, so we skip the segment entirely
+                continue
+            else:            
+                start = segment_last_addr
+            
+        
+        seg_info = {'start': start, 'end': end, 'name': entry.objfile, 'permissions': {
+            "r": entry.read,
+            "w": entry.write,
+            "x": entry.execute
+        }, 'content_file': ''}
+
+        # "(deleted)" may or may not be valid, but don't push it.
+        if entry.read and not '(deleted)' in entry.objfile:
+            try:
+                # Compress and dump the content to a file
+                seg_content = pwndbg.memory.read(start, end - start)
+                if(seg_content == None):
+                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.start, entry.objfile))
+                else:
+                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.start, len(seg_content), entry.objfile, repr(seg_info['permissions'])))
+                    compressed_seg_content = zlib.compress(seg_content)
+                    md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
+                    seg_info["content_file"] = md5_sum
+                    
+                    # Write the compressed contents to disk
+                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file.write(compressed_seg_content)
+                    out_file.close()
+
+            except:
+                print("Exception reading segment ({}): {}".format(entry.objfile, sys.exc_info()[0]))
+        else:
+            print("Skipping segment {0}@0x{1:016x}".format(entry.objfile, entry.start))
+        
+        segment_last_addr = end
+
+        # Add the segment to the list
+        final_segment_list.append(seg_info)
+
+            
+    return final_segment_list
+
+#----------
+#---- Main    
+    
+def main():
+    print("----- Unicorn Context Dumper -----")
+    print("You must be actively debugging before running this!")
+    print("If it fails, double check that you are actively debugging before running.")
+    
+    try:
+
+        # Create the output directory
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        output_path = "UnicornContext_" + timestamp
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+        print("Process context will be output to {}".format(output_path))
+            
+        # Get the context
+        context = {
+            "arch": dump_arch_info(),
+            "regs": dump_regs(), 
+            "segments": dump_process_memory(output_path),
+        }
+
+        # Write the index file
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file.write(json.dumps(context, indent=4))
+        index_file.close()    
+        print("Done.")
+        
+    except Exception as e:
+        print("!!! ERROR:\n\t{}".format(repr(e)))
+        
+if __name__ == "__main__" and pwndbg_loaded:
+    main()
+    
diff --git a/unicorn_mode/helper_scripts/unicorn_loader.py b/unicorn_mode/helper_scripts/unicorn_loader.py
new file mode 100644
index 00000000..adf21b64
--- /dev/null
+++ b/unicorn_mode/helper_scripts/unicorn_loader.py
@@ -0,0 +1,560 @@
+"""
+    unicorn_loader.py
+    
+    Loads a process context dumped created using a 
+    Unicorn Context Dumper script into a Unicorn Engine 
+    instance. Once this is performed emulation can be
+    started.
+"""
+
+import argparse
+import binascii
+from collections import namedtuple
+import datetime
+import hashlib
+import json
+import os
+import signal
+import struct
+import time
+import zlib
+
+# Unicorn imports
+from unicorn import *
+from unicorn.arm_const import *
+from unicorn.arm64_const import *
+from unicorn.x86_const import *
+from unicorn.mips_const import *
+
+# Name of the index file
+INDEX_FILE_NAME = "_index.json"
+
+# Page size required by Unicorn
+UNICORN_PAGE_SIZE = 0x1000
+
+# Max allowable segment size (1G)
+MAX_ALLOWABLE_SEG_SIZE = 1024 * 1024 * 1024
+
+# Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
+ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
+ALIGN_PAGE_UP   = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
+
+#---------------------------------------
+#---- Unicorn-based heap implementation
+
+class UnicornSimpleHeap(object):
+    """ Use this class to provide a simple heap implementation. This should
+        be used if malloc/free calls break things during emulation. This heap also
+        implements basic guard-page capabilities which enable immediate notice of
+        heap overflow and underflows.
+    """
+
+    # Helper data-container used to track chunks
+    class HeapChunk(object):
+        def __init__(self, actual_addr, total_size, data_size):
+            self.total_size = total_size                        # Total size of the chunk (including padding and guard page)
+            self.actual_addr = actual_addr                      # Actual start address of the chunk
+            self.data_size = data_size                          # Size requested by the caller of actual malloc call
+            self.data_addr = actual_addr + UNICORN_PAGE_SIZE    # Address where data actually starts
+
+        # Returns true if the specified buffer is completely within the chunk, else false
+        def is_buffer_in_chunk(self, addr, size):
+            if addr >= self.data_addr and ((addr + size) <= (self.data_addr + self.data_size)):
+                return True
+            else:
+                return False
+
+    # Skip the zero-page to avoid weird potential issues with segment registers
+    HEAP_MIN_ADDR = 0x00002000
+    HEAP_MAX_ADDR = 0xFFFFFFFF
+
+    _uc = None              # Unicorn engine instance to interact with
+    _chunks = []            # List of all known chunks
+    _debug_print = False    # True to print debug information
+
+    def __init__(self, uc, debug_print=False):
+        self._uc = uc
+        self._debug_print = debug_print
+
+        # Add the watchpoint hook that will be used to implement psuedo-guard page support
+        self._uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__check_mem_access)
+
+    def malloc(self, size):
+        # Figure out the overall size to be allocated/mapped
+        #    - Allocate at least 1 4k page of memory to make Unicorn happy
+        #    - Add guard pages at the start and end of the region
+        total_chunk_size = UNICORN_PAGE_SIZE + ALIGN_PAGE_UP(size) + UNICORN_PAGE_SIZE
+        # Gross but efficient way to find space for the chunk:
+        chunk = None
+        for addr in xrange(self.HEAP_MIN_ADDR, self.HEAP_MAX_ADDR, UNICORN_PAGE_SIZE):
+            try:
+                self._uc.mem_map(addr, total_chunk_size, UC_PROT_READ | UC_PROT_WRITE)
+                chunk = self.HeapChunk(addr, total_chunk_size, size)
+                if self._debug_print:
+                    print("Allocating 0x{0:x}-byte chunk @ 0x{1:016x}".format(chunk.data_size, chunk.data_addr))
+                break
+            except UcError as e:
+                continue
+        # Something went very wrong
+        if chunk == None:
+            return 0    
+        self._chunks.append(chunk)
+        return chunk.data_addr
+
+    def calloc(self, size, count):
+        # Simple wrapper around malloc with calloc() args
+        return self.malloc(size*count)
+
+    def realloc(self, ptr, new_size):
+        # Wrapper around malloc(new_size) / memcpy(new, old, old_size) / free(old)
+        if self._debug_print:
+            print("Reallocating chunk @ 0x{0:016x} to be 0x{1:x} bytes".format(ptr, new_size))
+        old_chunk = None
+        for chunk in self._chunks:
+            if chunk.data_addr == ptr:
+                old_chunk = chunk 
+        new_chunk_addr = self.malloc(new_size) 
+        if old_chunk != None:
+            self._uc.mem_write(new_chunk_addr, str(self._uc.mem_read(old_chunk.data_addr, old_chunk.data_size)))
+            self.free(old_chunk.data_addr)
+        return new_chunk_addr
+
+    def free(self, addr):
+        for chunk in self._chunks:
+            if chunk.is_buffer_in_chunk(addr, 1):
+                if self._debug_print:
+                    print("Freeing 0x{0:x}-byte chunk @ 0x{0:016x}".format(chunk.req_size, chunk.data_addr))
+                self._uc.mem_unmap(chunk.actual_addr, chunk.total_size)
+                self._chunks.remove(chunk)
+                return True
+        return False
+
+    # Implements basic guard-page functionality
+    def __check_mem_access(self, uc, access, address, size, value, user_data):
+        for chunk in self._chunks:
+            if address >= chunk.actual_addr and ((address + size) <= (chunk.actual_addr + chunk.total_size)):
+                if chunk.is_buffer_in_chunk(address, size) == False:
+                    if self._debug_print:
+                        print("Heap over/underflow attempting to {0} 0x{1:x} bytes @ {2:016x}".format( \
+                            "write" if access == UC_MEM_WRITE else "read", size, address))
+                    # Force a memory-based crash
+                    uc.force_crash(UcError(UC_ERR_READ_PROT))
+
+#---------------------------
+#---- Loading function
+
+class AflUnicornEngine(Uc):
+
+    def __init__(self, context_directory, enable_trace=False, debug_print=False):
+        """
+        Initializes an AflUnicornEngine instance, which extends standard the UnicornEngine
+        with a bunch of helper routines that are useful for creating afl-unicorn test harnesses.
+
+        Parameters:
+          - context_directory: Path to the directory generated by one of the context dumper scripts
+          - enable_trace: If True trace information will be printed to STDOUT
+          - debug_print: If True debugging information will be printed while loading the context
+        """
+
+        # Make sure the index file exists and load it
+        index_file_path = os.path.join(context_directory, INDEX_FILE_NAME)
+        if not os.path.isfile(index_file_path):
+            raise Exception("Index file not found. Expected it to be at {}".format(index_file_path))
+
+        # Load the process context from the index file
+        if debug_print:
+            print("Loading process context index from {}".format(index_file_path))
+        index_file = open(index_file_path, 'r')
+        context = json.load(index_file)
+        index_file.close()
+
+        # Check the context to make sure we have the basic essential components
+        if 'arch' not in context:
+            raise Exception("Couldn't find architecture information in index file")
+        if 'regs' not in context:
+            raise Exception("Couldn't find register information in index file")
+        if 'segments' not in context:
+            raise Exception("Couldn't find segment/memory information in index file")
+
+        # Set the UnicornEngine instance's architecture and mode
+        self._arch_str = context['arch']['arch']
+        arch, mode = self.__get_arch_and_mode(self._arch_str)
+        Uc.__init__(self, arch, mode)
+
+        # Load the registers
+        regs = context['regs']
+        reg_map = self.__get_register_map(self._arch_str)
+        for register, value in regs.iteritems():
+            if debug_print:
+                print("Reg {0} = {1}".format(register, value))
+            if not reg_map.has_key(register.lower()):
+                if debug_print:
+                    print("Skipping Reg: {}".format(register))
+            else:
+                reg_write_retry = True
+                try:
+                    self.reg_write(reg_map[register.lower()], value)
+                    reg_write_retry = False
+                except Exception as e:
+                    if debug_print:
+                        print("ERROR writing register: {}, value: {} -- {}".format(register, value, repr(e)))
+
+                if reg_write_retry:
+                    if debug_print:
+                        print("Trying to parse value ({}) as hex string".format(value))
+                    try:
+                        self.reg_write(reg_map[register.lower()], int(value, 16))
+                    except Exception as e:
+                        if debug_print:
+                            print("ERROR writing hex string register: {}, value: {} -- {}".format(register, value, repr(e)))
+                        
+        # Setup the memory map and load memory content
+        self.__map_segments(context['segments'], context_directory, debug_print)
+        
+        if enable_trace:
+            self.hook_add(UC_HOOK_BLOCK, self.__trace_block)
+            self.hook_add(UC_HOOK_CODE, self.__trace_instruction)
+            self.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__trace_mem_access)
+            self.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, self.__trace_mem_invalid_access)
+            
+        if debug_print:
+            print("Done loading context.")
+
+    def get_arch(self):
+        return self._arch
+
+    def get_mode(self):
+        return self._mode
+
+    def get_arch_str(self):
+        return self._arch_str
+                    
+    def force_crash(self, uc_error):
+        """ This function should be called to indicate to AFL that a crash occurred during emulation.
+            You can pass the exception received from Uc.emu_start
+        """
+        mem_errors = [
+            UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
+            UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
+            UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
+        ]
+        if uc_error.errno in mem_errors:
+            # Memory error - throw SIGSEGV
+            os.kill(os.getpid(), signal.SIGSEGV)
+        elif uc_error.errno == UC_ERR_INSN_INVALID:
+            # Invalid instruction - throw SIGILL
+            os.kill(os.getpid(), signal.SIGILL)
+        else:
+            # Not sure what happened - throw SIGABRT
+            os.kill(os.getpid(), signal.SIGABRT)
+
+    def dump_regs(self):
+        """ Dumps the contents of all the registers to STDOUT """
+        for reg in sorted(self.__get_register_map(self._arch_str).items(), key=lambda reg: reg[0]):
+            print(">>> {0:>4}: 0x{1:016x}".format(reg[0], self.reg_read(reg[1])))
+
+    # TODO: Make this dynamically get the stack pointer register and pointer width for the current architecture
+    """
+    def dump_stack(self, window=10):
+        print(">>> Stack:")
+        stack_ptr_addr = self.reg_read(UC_X86_REG_RSP)
+        for i in xrange(-window, window + 1):
+            addr = stack_ptr_addr + (i*8)
+            print("{0}0x{1:016x}: 0x{2:016x}".format( \
+                'SP->' if i == 0 else '    ', addr, \
+                struct.unpack('<Q', self.mem_read(addr, 8))[0]))
+    """
+
+    #-----------------------------
+    #---- Loader Helper Functions
+
+    def __map_segment(self, name, address, size, perms, debug_print=False):
+        # - size is unsigned and must be != 0
+        # - starting address must be aligned to 4KB
+        # - map size must be multiple of the page size (4KB)
+        mem_start = address
+        mem_end = address + size
+        mem_start_aligned = ALIGN_PAGE_DOWN(mem_start)
+        mem_end_aligned = ALIGN_PAGE_UP(mem_end)
+        if debug_print:
+            if mem_start_aligned != mem_start or mem_end_aligned != mem_end:
+                print("Aligning segment to page boundary:")
+                print("  name:  {}".format(name))
+                print("  start: {0:016x} -> {1:016x}".format(mem_start, mem_start_aligned))
+                print("  end:   {0:016x} -> {1:016x}".format(mem_end, mem_end_aligned))
+            print("Mapping segment from {0:016x} - {1:016x} with perm={2}: {3}".format(mem_start_aligned, mem_end_aligned, perms, name))
+        if(mem_start_aligned < mem_end_aligned):
+            self.mem_map(mem_start_aligned, mem_end_aligned - mem_start_aligned, perms)
+
+
+    def __map_segments(self, segment_list, context_directory, debug_print=False):
+        for segment in segment_list:
+            
+            # Get the segment information from the index
+            name = segment['name']
+            seg_start = segment['start']
+            seg_end = segment['end']
+            perms = \
+                (UC_PROT_READ  if segment['permissions']['r'] == True else 0) | \
+                (UC_PROT_WRITE if segment['permissions']['w'] == True else 0) | \
+                (UC_PROT_EXEC  if segment['permissions']['x'] == True else 0)        
+
+            if debug_print:
+                print("Handling segment {}".format(name))
+
+            # Check for any overlap with existing segments. If there is, it must
+            # be consolidated and merged together before mapping since Unicorn
+            # doesn't allow overlapping segments.
+            found = False
+            overlap_start = False
+            overlap_end = False
+            tmp = 0
+            for (mem_start, mem_end, mem_perm) in self.mem_regions():
+                mem_end = mem_end + 1
+                if seg_start >= mem_start and seg_end < mem_end:
+                    found = True
+                    break
+                if seg_start >= mem_start and seg_start < mem_end:
+                    overlap_start = True
+                    tmp = mem_end
+                    break
+                if seg_end >= mem_start and seg_end < mem_end:
+                    overlap_end = True
+                    tmp = mem_start
+                    break
+
+            # Map memory into the address space if it is of an acceptable size.
+            if (seg_end - seg_start) > MAX_ALLOWABLE_SEG_SIZE:
+                if debug_print:
+                    print("Skipping segment (LARGER THAN {0}) from {1:016x} - {2:016x} with perm={3}: {4}".format(MAX_ALLOWABLE_SEG_SIZE, seg_start, seg_end, perms, name))
+                continue
+            elif not found:           # Make sure it's not already mapped
+                if overlap_start:     # Partial overlap (start)
+                    self.__map_segment(name, tmp, seg_end - tmp, perms, debug_print)
+                elif overlap_end:       # Patrial overlap (end)
+                    self.__map_segment(name, seg_start, tmp - seg_start, perms, debug_print)
+                else:                   # Not found
+                    self.__map_segment(name, seg_start, seg_end - seg_start, perms, debug_print)
+            else:
+                if debug_print:
+                    print("Segment {} already mapped. Moving on.".format(name))
+
+            # Load the content (if available)
+            if 'content_file' in segment and len(segment['content_file']) > 0:
+                content_file_path = os.path.join(context_directory, segment['content_file'])
+                if not os.path.isfile(content_file_path):
+                    raise Exception("Unable to find segment content file. Expected it to be at {}".format(content_file_path))
+                #if debug_print:
+                #    print("Loading content for segment {} from {}".format(name, segment['content_file']))
+                content_file = open(content_file_path, 'rb')
+                compressed_content = content_file.read()
+                content_file.close()
+                self.mem_write(seg_start, zlib.decompress(compressed_content)) 
+
+            else:
+                if debug_print:
+                    print("No content found for segment {0} @ {1:016x}".format(name, seg_start))
+                self.mem_write(seg_start, '\x00' * (seg_end - seg_start))
+
+    def __get_arch_and_mode(self, arch_str):
+        arch_map = {
+            "x64"       : [ UC_X86_REG_RIP,     UC_ARCH_X86,    UC_MODE_64 ],
+            "x86"       : [ UC_X86_REG_EIP,     UC_ARCH_X86,    UC_MODE_32 ],
+            "arm64be"   : [ UC_ARM64_REG_PC,    UC_ARCH_ARM64,  UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
+            "arm64le"   : [ UC_ARM64_REG_PC,    UC_ARCH_ARM64,  UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
+            "armbe"     : [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_ARM | UC_MODE_BIG_ENDIAN ],
+            "armle"     : [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ],
+            "armbethumb": [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_THUMB | UC_MODE_BIG_ENDIAN ],
+            "armlethumb": [ UC_ARM_REG_PC,      UC_ARCH_ARM,    UC_MODE_THUMB | UC_MODE_LITTLE_ENDIAN ],
+            "mips"      : [ UC_MIPS_REG_PC,     UC_ARCH_MIPS,   UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN ],
+            "mipsel"    : [ UC_MIPS_REG_PC,     UC_ARCH_MIPS,   UC_MODE_MIPS32 | UC_MODE_LITTLE_ENDIAN ],
+        }
+        return (arch_map[arch_str][1], arch_map[arch_str][2])
+
+    def __get_register_map(self, arch):
+        if arch == "arm64le" or arch == "arm64be":
+            arch = "arm64"
+        elif arch == "armle" or arch == "armbe" or "thumb" in arch:
+            arch = "arm"
+        elif arch == "mipsel":
+            arch = "mips"
+
+        registers = {
+            "x64" : {
+                "rax":    UC_X86_REG_RAX,
+                "rbx":    UC_X86_REG_RBX,
+                "rcx":    UC_X86_REG_RCX,
+                "rdx":    UC_X86_REG_RDX,
+                "rsi":    UC_X86_REG_RSI,
+                "rdi":    UC_X86_REG_RDI,
+                "rbp":    UC_X86_REG_RBP,
+                "rsp":    UC_X86_REG_RSP,
+                "r8":     UC_X86_REG_R8,
+                "r9":     UC_X86_REG_R9,
+                "r10":    UC_X86_REG_R10,
+                "r11":    UC_X86_REG_R11,
+                "r12":    UC_X86_REG_R12,
+                "r13":    UC_X86_REG_R13,
+                "r14":    UC_X86_REG_R14,
+                "r15":    UC_X86_REG_R15,
+                "rip":    UC_X86_REG_RIP,
+                "rsp":    UC_X86_REG_RSP,
+                "efl":    UC_X86_REG_EFLAGS,
+                "cs":     UC_X86_REG_CS,
+                "ds":     UC_X86_REG_DS,
+                "es":     UC_X86_REG_ES,
+                "fs":     UC_X86_REG_FS,
+                "gs":     UC_X86_REG_GS,
+                "ss":     UC_X86_REG_SS,
+            },
+            "x86" : {
+                "eax":    UC_X86_REG_EAX,
+                "ebx":    UC_X86_REG_EBX,
+                "ecx":    UC_X86_REG_ECX,
+                "edx":    UC_X86_REG_EDX,
+                "esi":    UC_X86_REG_ESI,
+                "edi":    UC_X86_REG_EDI,
+                "ebp":    UC_X86_REG_EBP,
+                "esp":    UC_X86_REG_ESP,
+                "eip":    UC_X86_REG_EIP,
+                "esp":    UC_X86_REG_ESP,
+                "efl":    UC_X86_REG_EFLAGS,        
+                # Segment registers removed...
+                # They caused segfaults (from unicorn?) when they were here
+            },        
+            "arm" : {
+                "r0":     UC_ARM_REG_R0,
+                "r1":     UC_ARM_REG_R1,
+                "r2":     UC_ARM_REG_R2,
+                "r3":     UC_ARM_REG_R3,
+                "r4":     UC_ARM_REG_R4,
+                "r5":     UC_ARM_REG_R5,
+                "r6":     UC_ARM_REG_R6,
+                "r7":     UC_ARM_REG_R7,
+                "r8":     UC_ARM_REG_R8,
+                "r9":     UC_ARM_REG_R9,
+                "r10":    UC_ARM_REG_R10,
+                "r11":    UC_ARM_REG_R11,
+                "r12":    UC_ARM_REG_R12,
+                "pc":     UC_ARM_REG_PC,
+                "sp":     UC_ARM_REG_SP,
+                "lr":     UC_ARM_REG_LR,
+                "cpsr":   UC_ARM_REG_CPSR
+            },
+            "arm64" : {
+                "x0":     UC_ARM64_REG_X0,
+                "x1":     UC_ARM64_REG_X1,
+                "x2":     UC_ARM64_REG_X2,
+                "x3":     UC_ARM64_REG_X3,
+                "x4":     UC_ARM64_REG_X4,
+                "x5":     UC_ARM64_REG_X5,
+                "x6":     UC_ARM64_REG_X6,
+                "x7":     UC_ARM64_REG_X7,
+                "x8":     UC_ARM64_REG_X8,
+                "x9":     UC_ARM64_REG_X9,
+                "x10":    UC_ARM64_REG_X10,
+                "x11":    UC_ARM64_REG_X11,
+                "x12":    UC_ARM64_REG_X12,
+                "x13":    UC_ARM64_REG_X13,
+                "x14":    UC_ARM64_REG_X14,
+                "x15":    UC_ARM64_REG_X15,
+                "x16":    UC_ARM64_REG_X16,
+                "x17":    UC_ARM64_REG_X17,
+                "x18":    UC_ARM64_REG_X18,
+                "x19":    UC_ARM64_REG_X19,
+                "x20":    UC_ARM64_REG_X20,
+                "x21":    UC_ARM64_REG_X21,
+                "x22":    UC_ARM64_REG_X22,
+                "x23":    UC_ARM64_REG_X23,
+                "x24":    UC_ARM64_REG_X24,
+                "x25":    UC_ARM64_REG_X25,
+                "x26":    UC_ARM64_REG_X26,
+                "x27":    UC_ARM64_REG_X27,
+                "x28":    UC_ARM64_REG_X28,
+                "pc":     UC_ARM64_REG_PC,
+                "sp":     UC_ARM64_REG_SP,
+                "fp":     UC_ARM64_REG_FP,
+                "lr":     UC_ARM64_REG_LR,
+                "nzcv":   UC_ARM64_REG_NZCV,
+                "cpsr": UC_ARM_REG_CPSR, 
+            },
+            "mips" : {
+                "0" :     UC_MIPS_REG_ZERO,
+                "at":     UC_MIPS_REG_AT,
+                "v0":     UC_MIPS_REG_V0,
+                "v1":     UC_MIPS_REG_V1,
+                "a0":     UC_MIPS_REG_A0,
+                "a1":     UC_MIPS_REG_A1,
+                "a2":     UC_MIPS_REG_A2,
+                "a3":     UC_MIPS_REG_A3,
+                "t0":     UC_MIPS_REG_T0,
+                "t1":     UC_MIPS_REG_T1,
+                "t2":     UC_MIPS_REG_T2,
+                "t3":     UC_MIPS_REG_T3,
+                "t4":     UC_MIPS_REG_T4,
+                "t5":     UC_MIPS_REG_T5,
+                "t6":     UC_MIPS_REG_T6,
+                "t7":     UC_MIPS_REG_T7,
+                "t8":     UC_MIPS_REG_T8,
+                "t9":     UC_MIPS_REG_T9,
+                "s0":     UC_MIPS_REG_S0,
+                "s1":     UC_MIPS_REG_S1,
+                "s2":     UC_MIPS_REG_S2,    
+                "s3":     UC_MIPS_REG_S3,
+                "s4":     UC_MIPS_REG_S4,
+                "s5":     UC_MIPS_REG_S5,
+                "s6":     UC_MIPS_REG_S6,              
+                "s7":     UC_MIPS_REG_S7,
+                "s8":     UC_MIPS_REG_S8,  
+                "k0":     UC_MIPS_REG_K0,
+                "k1":     UC_MIPS_REG_K1,
+                "gp":     UC_MIPS_REG_GP,
+                "pc":     UC_MIPS_REG_PC,
+                "sp":     UC_MIPS_REG_SP,
+                "fp":     UC_MIPS_REG_FP,
+                "ra":     UC_MIPS_REG_RA,
+                "hi":     UC_MIPS_REG_HI,
+                "lo":     UC_MIPS_REG_LO
+            }
+        }
+        return registers[arch]   
+
+    #---------------------------
+    # Callbacks for tracing 
+
+    # TODO: Make integer-printing fixed widths dependent on bitness of architecture 
+    #       (i.e. only show 4 bytes for 32-bit, 8 bytes for 64-bit)
+
+    # TODO: Figure out how best to determine the capstone mode and architecture here
+    """
+    try:
+        # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
+        from capstone import *
+        cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+        def __trace_instruction(self, uc, address, size, user_data):
+            mem = uc.mem_read(address, size)
+            for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+                print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+    except ImportError:
+        def __trace_instruction(self, uc, address, size, user_data):
+            print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
+    """
+
+    def __trace_instruction(self, uc, address, size, user_data):
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))  
+        
+    def __trace_block(self, uc, address, size, user_data):
+        print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+      
+    def __trace_mem_access(self, uc, access, address, size, value, user_data):
+        if access == UC_MEM_WRITE:
+            print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        else:
+            print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
+
+    def __trace_mem_invalid_access(self, uc, access, address, size, value, user_data):
+        if access == UC_MEM_WRITE_UNMAPPED:
+            print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        else:
+            print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
+
diff --git a/unicorn_mode/patches/afl-unicorn-cpu-inl.h b/unicorn_mode/patches/afl-unicorn-cpu-inl.h
new file mode 100644
index 00000000..892c3f72
--- /dev/null
+++ b/unicorn_mode/patches/afl-unicorn-cpu-inl.h
@@ -0,0 +1,290 @@
+/*
+   american fuzzy lop - high-performance binary-only instrumentation
+   -----------------------------------------------------------------
+
+   Written by Andrew Griffiths <agriffiths@google.com> and
+              Michal Zalewski <lcamtuf@google.com>
+
+   TCG instrumentation and block chaining support by Andrea Biondo
+                                      <andrea.biondo965@gmail.com>
+   Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co>
+
+   Idea & design very much by Andrew Griffiths.
+
+   Copyright 2015, 2016 Google Inc. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   This code is a shim patched into the separately-distributed source
+   code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality
+   to implement AFL-style instrumentation and to take care of the remaining
+   parts of the AFL fork server logic.
+
+   The resulting QEMU binary is essentially a standalone instrumentation
+   tool; for an example of how to leverage it for other purposes, you can
+   have a look at afl-showmap.c.
+
+ */
+
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "../../config.h"
+
+/***************************
+ * VARIOUS AUXILIARY STUFF *
+ ***************************/
+
+/* A snippet patched into tb_find_slow to inform the parent process that
+   we have hit a new block that hasn't been translated yet, and to tell
+   it to translate within its own context, too (this avoids translation
+   overhead in the next forked-off copy). */
+
+#define AFL_UNICORN_CPU_SNIPPET1 do { \
+    afl_request_tsl(pc, cs_base, flags); \
+  } while (0)
+
+/* This snippet kicks in when the instruction pointer is positioned at
+   _start and does the usual forkserver stuff, not very different from
+   regular instrumentation injected via afl-as.h. */
+
+#define AFL_UNICORN_CPU_SNIPPET2 do { \
+    if(unlikely(afl_first_instr == 0)) { \
+      afl_setup(); \
+      afl_forkserver(env); \
+      afl_first_instr = 1; \
+    } \
+    afl_maybe_log(tb->pc); \
+  } while (0)
+
+/* We use one additional file descriptor to relay "needs translation"
+   messages between the child and the fork server. */
+
+#define TSL_FD (FORKSRV_FD - 1)
+
+/* This is equivalent to afl-as.h: */
+
+static unsigned char *afl_area_ptr;
+
+/* Set in the child process in forkserver mode: */
+
+static unsigned char afl_fork_child;
+static unsigned int afl_forksrv_pid;
+
+/* Instrumentation ratio: */
+
+static unsigned int afl_inst_rms = MAP_SIZE;
+
+/* Function declarations. */
+
+static void afl_setup(void);
+static void afl_forkserver(CPUArchState*);
+static inline void afl_maybe_log(unsigned long);
+
+static void afl_wait_tsl(CPUArchState*, int);
+static void afl_request_tsl(target_ulong, target_ulong, uint64_t);
+
+static TranslationBlock *tb_find_slow(CPUArchState*, target_ulong,
+                                      target_ulong, uint64_t);
+
+/* Data structure passed around by the translate handlers: */
+
+struct afl_tsl {
+  target_ulong pc;
+  target_ulong cs_base;
+  uint64_t flags;
+};
+
+/*************************
+ * ACTUAL IMPLEMENTATION *
+ *************************/
+
+/* Set up SHM region and initialize other stuff. */
+
+static void afl_setup(void) {
+
+  char *id_str = getenv(SHM_ENV_VAR),
+       *inst_r = getenv("AFL_INST_RATIO");
+
+  int shm_id;
+
+  if (inst_r) {
+
+    unsigned int r;
+
+    r = atoi(inst_r);
+
+    if (r > 100) r = 100;
+    if (!r) r = 1;
+
+    afl_inst_rms = MAP_SIZE * r / 100;
+
+  }
+
+  if (id_str) {
+
+    shm_id = atoi(id_str);
+    afl_area_ptr = shmat(shm_id, NULL, 0);
+
+    if (afl_area_ptr == (void*)-1) exit(1);
+
+    /* With AFL_INST_RATIO set to a low value, we want to touch the bitmap
+       so that the parent doesn't give up on us. */
+
+    if (inst_r) afl_area_ptr[0] = 1;
+  }
+}
+
+/* Fork server logic, invoked once we hit first emulated instruction. */
+
+static void afl_forkserver(CPUArchState *env) {
+
+  static unsigned char tmp[4];
+
+  if (!afl_area_ptr) return;
+
+  /* Tell the parent that we're alive. If the parent doesn't want
+     to talk, assume that we're not running in forkserver mode. */
+
+  if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
+
+  afl_forksrv_pid = getpid();
+
+  /* All right, let's await orders... */
+
+  while (1) {
+
+    pid_t child_pid;
+    int status, t_fd[2];
+
+    /* Whoops, parent dead? */
+
+    if (read(FORKSRV_FD, tmp, 4) != 4) exit(2);
+
+    /* Establish a channel with child to grab translation commands. We'll 
+       read from t_fd[0], child will write to TSL_FD. */
+
+    if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
+    close(t_fd[1]);
+
+    child_pid = fork();
+    if (child_pid < 0) exit(4);
+
+    if (!child_pid) {
+
+      /* Child process. Close descriptors and run free. */
+
+      afl_fork_child = 1;
+      close(FORKSRV_FD);
+      close(FORKSRV_FD + 1);
+      close(t_fd[0]);
+      return;
+
+    }
+
+    /* Parent. */
+
+    close(TSL_FD);
+
+    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);
+
+    /* Collect translation requests until child dies and closes the pipe. */
+
+    afl_wait_tsl(env, t_fd[0]);
+
+    /* Get and relay exit status to parent. */
+
+    if (waitpid(child_pid, &status, 0) < 0) exit(6);
+    if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);
+
+  }
+
+}
+
+
+/* The equivalent of the tuple logging routine from afl-as.h. */
+
+static inline void afl_maybe_log(unsigned long cur_loc) {
+
+  static __thread unsigned long prev_loc;
+
+  // DEBUG
+  //printf("IN AFL_MAYBE_LOG 0x%lx\n", cur_loc);
+
+  // MODIFIED FOR UNICORN MODE -> We want to log all addresses,
+  // so the checks for 'start < addr < end' are removed
+  if(!afl_area_ptr)
+    return;
+
+  // DEBUG
+  //printf("afl_area_ptr = %p\n", afl_area_ptr);
+
+  /* Looks like QEMU always maps to fixed locations, so ASAN is not a
+     concern. Phew. But instruction addresses may be aligned. Let's mangle
+     the value to get something quasi-uniform. */
+
+  cur_loc  = (cur_loc >> 4) ^ (cur_loc << 8);
+  cur_loc &= MAP_SIZE - 1;
+
+  /* Implement probabilistic instrumentation by looking at scrambled block
+     address. This keeps the instrumented locations stable across runs. */
+
+  // DEBUG
+  //printf("afl_inst_rms = 0x%lx\n", afl_inst_rms);
+
+  if (cur_loc >= afl_inst_rms) return;
+
+  // DEBUG
+  //printf("cur_loc = 0x%lx\n", cur_loc);  
+
+  afl_area_ptr[cur_loc ^ prev_loc]++;
+  prev_loc = cur_loc >> 1;
+
+}
+
+
+/* This code is invoked whenever QEMU decides that it doesn't have a
+   translation of a particular block and needs to compute it. When this happens,
+   we tell the parent to mirror the operation, so that the next fork() has a
+   cached copy. */
+
+static void afl_request_tsl(target_ulong pc, target_ulong cb, uint64_t flags) {
+
+  struct afl_tsl t;
+
+  if (!afl_fork_child) return;
+
+  t.pc      = pc;
+  t.cs_base = cb;
+  t.flags   = flags;
+
+  if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
+    return;
+
+}
+
+
+/* This is the other side of the same channel. Since timeouts are handled by
+   afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
+
+static void afl_wait_tsl(CPUArchState *env, int fd) {
+
+  struct afl_tsl t;
+
+  while (1) {
+
+    /* Broken pipe means it's time to return to the fork server routine. */
+
+    if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
+      break;
+
+    tb_find_slow(env, t.pc, t.cs_base, t.flags);
+  }
+
+  close(fd);
+}
+
diff --git a/unicorn_mode/patches/patches.diff b/unicorn_mode/patches/patches.diff
new file mode 100644
index 00000000..71a1e2f3
--- /dev/null
+++ b/unicorn_mode/patches/patches.diff
@@ -0,0 +1,107 @@
+diff --git a/Makefile b/Makefile
+index 7d73782..fb3ccfd 100644
+--- a/Makefile
++++ b/Makefile
+@@ -88,6 +88,10 @@ AR = llvm-ar
+ LDFLAGS := -fsanitize=address ${LDFLAGS}
+ endif
+ 
++ifeq ($(UNICORN_AFL),yes)
++UNICORN_CFLAGS += -DUNICORN_AFL
++endif
++
+ ifeq ($(CROSS),)
+ CC ?= cc
+ AR ?= ar
+diff --git a/config.mk b/config.mk
+index c3621fb..c7b4f7e 100644
+--- a/config.mk
++++ b/config.mk
+@@ -8,7 +8,7 @@
+ # Compile with debug info when you want to debug code.
+ # Change this to 'no' for release edition.
+ 
+-UNICORN_DEBUG ?= yes
++UNICORN_DEBUG ?= no
+ 
+ ################################################################################
+ # Specify which archs you want to compile in. By default, we build all archs.
+@@ -28,3 +28,9 @@ UNICORN_STATIC ?= yes
+ # a shared library.
+ 
+ UNICORN_SHARED ?= yes
++
++
++################################################################################
++# Changing 'UNICORN_AFLL = yes' to 'UNICORN_AFL = no' disables AFL instrumentation
++
++UNICORN_AFL ?= yes
+diff --git a/qemu/cpu-exec.c b/qemu/cpu-exec.c
+index 7755adf..8114b70 100644
+--- a/qemu/cpu-exec.c
++++ b/qemu/cpu-exec.c
+@@ -24,6 +24,11 @@
+ 
+ #include "uc_priv.h"
+ 
++#if defined(UNICORN_AFL)
++#include "../afl-unicorn-cpu-inl.h"
++static int afl_first_instr = 0;
++#endif 
++
+ static tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr);
+ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
+         target_ulong cs_base, uint64_t flags);
+@@ -231,6 +236,10 @@ int cpu_exec(struct uc_struct *uc, CPUArchState *env)   // qq
+                             next_tb & TB_EXIT_MASK, tb);
+                 }
+ 
++#if defined(UNICORN_AFL)
++                AFL_UNICORN_CPU_SNIPPET2;
++#endif
++
+                 /* cpu_interrupt might be called while translating the
+                    TB, but before it is linked into a potentially
+                    infinite loop and becomes env->current_tb. Avoid
+@@ -369,6 +378,11 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc,
+ not_found:
+     /* if no translated code available, then translate it now */
+     tb = tb_gen_code(cpu, pc, cs_base, (int)flags, 0);   // qq
++    
++#if defined(UNICORN_AFL)
++    /* There seems to be no chaining in unicorn ever? :( */
++    AFL_UNICORN_CPU_SNIPPET1;
++#endif
+ 
+ found:
+     /* Move the last found TB to the head of the list */
+diff --git a/qemu/translate-all.c b/qemu/translate-all.c
+index 1a96c34..7ef4878 100644
+--- a/qemu/translate-all.c
++++ b/qemu/translate-all.c
+@@ -403,11 +403,25 @@ static PageDesc *page_find_alloc(struct uc_struct *uc, tb_page_addr_t index, int
+ 
+ #if defined(CONFIG_USER_ONLY)
+     /* We can't use g_malloc because it may recurse into a locked mutex. */
++#if defined(UNICORN_AFL)
++    /* This was added by unicorn-afl to bail out semi-gracefully if out of memory. */
++# define ALLOC(P, SIZE)                                 \
++    do {                                                \
++        void* _tmp = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
++                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
++        if (_tmp == (void*)-1) { \
++            qemu_log(">>> Out of memory for stack, bailing out. <<<\n"); \
++            exit(1); \
++        } \
++        (P) = _tmp; \
++    } while (0)
++#else /* !UNICORN_AFL */
+ # define ALLOC(P, SIZE)                                 \
+     do {                                                \
+         P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
+     } while (0)
++#endif /* UNICORN_AFL */
+ #else
+ # define ALLOC(P, SIZE) \
+     do { P = g_malloc0(SIZE); } while (0)
diff --git a/unicorn_mode/samples/simple/COMPILE.md b/unicorn_mode/samples/simple/COMPILE.md
new file mode 100644
index 00000000..bd4a66c6
--- /dev/null
+++ b/unicorn_mode/samples/simple/COMPILE.md
@@ -0,0 +1,41 @@
+Compiling simple_target.c
+==========================
+
+You shouldn't need to compile simple_target.c since a MIPS binary version is
+pre-built and shipped with afl-unicorn. This file documents how the binary
+was built in case you want to rebuild it or recompile it for any reason.
+
+The pre-built binary (simple_target.bin) was built by cross-compiling 
+simple_target.c for MIPS using the mips-linux-gnu-gcc package on an Ubuntu
+16.04 LTS system. This cross compiler (and associated binutils) was installed
+from apt-get packages:
+
+```
+sudo apt-get install gcc-mips-linux-gnu
+```
+
+simple_target.c was compiled without optimization, position-independent,
+and without standard libraries using the following command line:
+
+```
+mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
+```
+
+The .text section from the resulting ELF binary was then extracted to create
+the raw binary blob that is loaded and emulated by simple_test_harness.py:
+
+```
+mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin 
+```
+
+In summary, to recreate simple_taget.bin execute the following:
+
+```
+mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib
+  && mips-linux-gnu-objcopy -O binary --only-section=.text simple_target.elf simple_target.bin 
+    && rm simple_target.elf
+```
+
+Note that the output of this is padded with nulls for 16-byte alignment. This is 
+important when emulating it, as NOPs will be added after the return of main()
+as necessary.
\ No newline at end of file
diff --git a/unicorn_mode/samples/simple/sample_inputs/sample1.bin b/unicorn_mode/samples/simple/sample_inputs/sample1.bin
new file mode 100644
index 00000000..85df5078
--- /dev/null
+++ b/unicorn_mode/samples/simple/sample_inputs/sample1.bin
@@ -0,0 +1 @@
+abcd
\ No newline at end of file
diff --git a/unicorn_mode/samples/simple/sample_inputs/sample2.bin b/unicorn_mode/samples/simple/sample_inputs/sample2.bin
new file mode 100644
index 00000000..f76dd238
--- /dev/null
+++ b/unicorn_mode/samples/simple/sample_inputs/sample2.bin
Binary files differdiff --git a/unicorn_mode/samples/simple/sample_inputs/sample3.bin b/unicorn_mode/samples/simple/sample_inputs/sample3.bin
new file mode 100644
index 00000000..6b2aaa76
--- /dev/null
+++ b/unicorn_mode/samples/simple/sample_inputs/sample3.bin
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/unicorn_mode/samples/simple/sample_inputs/sample4.bin b/unicorn_mode/samples/simple/sample_inputs/sample4.bin
new file mode 100644
index 00000000..71bd63e6
--- /dev/null
+++ b/unicorn_mode/samples/simple/sample_inputs/sample4.bin
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/unicorn_mode/samples/simple/sample_inputs/sample5.bin b/unicorn_mode/samples/simple/sample_inputs/sample5.bin
new file mode 100644
index 00000000..aed2973e
--- /dev/null
+++ b/unicorn_mode/samples/simple/sample_inputs/sample5.bin
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/unicorn_mode/samples/simple/simple_target.bin b/unicorn_mode/samples/simple/simple_target.bin
new file mode 100644
index 00000000..0095eb6c
--- /dev/null
+++ b/unicorn_mode/samples/simple/simple_target.bin
Binary files differdiff --git a/unicorn_mode/samples/simple/simple_target.c b/unicorn_mode/samples/simple/simple_target.c
new file mode 100644
index 00000000..49e1b4d8
--- /dev/null
+++ b/unicorn_mode/samples/simple/simple_target.c
@@ -0,0 +1,31 @@
+/*
+ * Sample target file to test afl-unicorn fuzzing capabilities.
+ * This is a very trivial example that will crash pretty easily
+ * in several different exciting ways. 
+ *
+ * Input is assumed to come from a buffer located at DATA_ADDRESS 
+ * (0x00300000), so make sure that your Unicorn emulation of this 
+ * puts user data there.
+ *
+ * Written by Nathan Voss <njvoss99@gmail.com>
+ */
+
+// Magic address where mutated data will be placed
+#define DATA_ADDRESS 	0x00300000
+
+int main(void) {
+  unsigned char *data_buf = (unsigned char *) DATA_ADDRESS;
+
+  if (data_buf[20] != 0) {
+    // Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04'
+    unsigned char invalid_read = *(unsigned char *) 0x00000000;
+  } else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
+    // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
+    unsigned char invalid_read = *(unsigned char *) 0x00000000;
+  } else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) {
+    // Cause a crash if data[10] is not zero, but [9] and [11] are zero
+    unsigned char invalid_read = *(unsigned char *) 0x00000000;
+  }
+
+  return 0;
+}
diff --git a/unicorn_mode/samples/simple/simple_test_harness.py b/unicorn_mode/samples/simple/simple_test_harness.py
new file mode 100644
index 00000000..6d0fb399
--- /dev/null
+++ b/unicorn_mode/samples/simple/simple_test_harness.py
@@ -0,0 +1,170 @@
+""" 
+   Simple test harness for AFL's Unicorn Mode.
+
+   This loads the simple_target.bin binary (precompiled as MIPS code) into
+   Unicorn's memory map for emulation, places the specified input into
+   simple_target's buffer (hardcoded to be at 0x300000), and executes 'main()'.
+   If any crashes occur during emulation, this script throws a matching signal
+   to tell AFL that a crash occurred.
+
+   Run under AFL as follows:
+
+   $ cd <afl_path>/unicorn_mode/samples/simple/
+   $ ../../../afl-fuzz -U -m none -i ./sample_inputs -o ./output -- python simple_test_harness.py @@ 
+"""
+
+import argparse
+import os
+import signal
+
+from unicorn import *
+from unicorn.mips_const import *
+
+# Path to the file containing the binary to emulate
+BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
+
+# Memory map for the code to be tested
+CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
+STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
+STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
+
+try:
+    # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
+    from capstone import *
+    cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+    def unicorn_debug_instruction(uc, address, size, user_data):
+        mem = uc.mem_read(address, size)
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+            print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+except ImportError:
+    def unicorn_debug_instruction(uc, address, size, user_data):
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
+
+def unicorn_debug_block(uc, address, size, user_data):
+    print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+    
+def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
+    if access == UC_MEM_WRITE:
+        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+    else:
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
+
+def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
+    if access == UC_MEM_WRITE_UNMAPPED:
+        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+    else:
+        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
+
+def force_crash(uc_error):
+    # This function should be called to indicate to AFL that a crash occurred during emulation.
+    # Pass in the exception received from Uc.emu_start()
+    mem_errors = [
+        UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
+        UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
+        UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
+    ]
+    if uc_error.errno in mem_errors:
+        # Memory error - throw SIGSEGV
+        os.kill(os.getpid(), signal.SIGSEGV)
+    elif uc_error.errno == UC_ERR_INSN_INVALID:
+        # Invalid instruction - throw SIGILL
+        os.kill(os.getpid(), signal.SIGILL)
+    else:
+        # Not sure what happened - throw SIGABRT
+        os.kill(os.getpid(), signal.SIGABRT)
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
+    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
+    parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing")
+    args = parser.parse_args()
+
+    # Instantiate a MIPS32 big endian Unicorn Engine instance
+    uc = Uc(UC_ARCH_MIPS, UC_MODE_MIPS32 + UC_MODE_BIG_ENDIAN)
+
+    if args.debug:
+        uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
+        uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
+        uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
+        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+
+    #---------------------------------------------------
+    # Load the binary to emulate and map it into memory
+
+    print("Loading data input from {}".format(args.input_file))
+    binary_file = open(BINARY_FILE, 'rb')
+    binary_code = binary_file.read()
+    binary_file.close()
+
+    # Apply constraints to the mutated input
+    if len(binary_code) > CODE_SIZE_MAX:
+        print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
+        return
+
+    # Write the mutated command into the data buffer
+    uc.mem_map(CODE_ADDRESS, CODE_SIZE_MAX)
+    uc.mem_write(CODE_ADDRESS, binary_code)
+
+    # Set the program counter to the start of the code
+    start_address = CODE_ADDRESS          # Address of entry point of main()
+    end_address   = CODE_ADDRESS + 0xf4   # Address of last instruction in main()
+    uc.reg_write(UC_MIPS_REG_PC, start_address)
+
+    #-----------------
+    # Setup the stack
+
+    uc.mem_map(STACK_ADDRESS, STACK_SIZE)
+    uc.reg_write(UC_MIPS_REG_SP, STACK_ADDRESS + STACK_SIZE)
+
+    #-----------------------------------------------------
+    # Emulate 1 instruction to kick off AFL's fork server
+    #   THIS MUST BE DONE BEFORE LOADING USER DATA! 
+    #   If this isn't done every single run, the AFL fork server 
+    #   will not be started appropriately and you'll get erratic results!
+    #   It doesn't matter what this returns with, it just has to execute at
+    #   least one instruction in order to get the fork server started.
+
+    # Execute 1 instruction just to startup the forkserver
+    print("Starting the AFL forkserver by executing 1 instruction")
+    try:
+        uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), 0, 0, count=1)
+    except UcError as e:
+        print("ERROR: Failed to execute a single instruction (error: {})!".format(e))
+        return
+
+    #-----------------------------------------------
+    # Load the mutated input and map it into memory
+
+    # Load the mutated input from disk
+    print("Loading data input from {}".format(args.input_file))
+    input_file = open(args.input_file, 'rb')
+    input = input_file.read()
+    input_file.close()
+
+    # Apply constraints to the mutated input
+    if len(input) > DATA_SIZE_MAX:
+        print("Test input is too long (> {} bytes)".format(DATA_SIZE_MAX))
+        return
+
+    # Write the mutated command into the data buffer
+    uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
+    uc.mem_write(DATA_ADDRESS, input)
+
+    #------------------------------------------------------------
+    # Emulate the code, allowing it to process the mutated input
+
+    print("Executing until a crash or execution reaches 0x{0:016x}".format(end_address))
+    try:
+        result = uc.emu_start(uc.reg_read(UC_MIPS_REG_PC), end_address, timeout=0, count=0)
+    except UcError as e:
+        print("Execution failed with error: {}".format(e))
+        force_crash(e)
+
+    print("Done.")
+
+if __name__ == "__main__":
+    main()