19 files changed, 1321 insertions, 2 deletions
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 2b8b0e8d..062dd785 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -37,6 +37,9 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
     - added AFL_LLVM_INSTRUMENT option NATIVE for native clang pc-guard
       support (less performant than our own), GCC for old afl-gcc and
       CLANG for old afl-clang
+  - unicornafl
+    - Substential speed gains in python bindings for certain use cases
+    - Added a new example harness to compare python, c, and rust bindings
   - changed default: no memory limit for afl-cmin and afl-cmin.bash
   - warn on any _AFL and __AFL env vars
   - LLVM mode is now compiled with -j4, unicorn with all cores. qemu was
diff --git a/unicorn_mode/UNICORNAFL_VERSION b/unicorn_mode/UNICORNAFL_VERSION
index 2dbc30b8..f1fb7f18 100644
--- a/unicorn_mode/UNICORNAFL_VERSION
+++ b/unicorn_mode/UNICORNAFL_VERSION
@@ -1 +1 @@
-83d1b426
+0dd17c58
diff --git a/unicorn_mode/samples/speedtest/.gitignore b/unicorn_mode/samples/speedtest/.gitignore
new file mode 100644
index 00000000..78310c60
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/.gitignore
@@ -0,0 +1,6 @@
+output
+harness
+harness-debug
+target
+target.o
+target.offsets.*
diff --git a/unicorn_mode/samples/speedtest/Makefile b/unicorn_mode/samples/speedtest/Makefile
new file mode 100644
index 00000000..23f5cb07
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/Makefile
@@ -0,0 +1,17 @@
+CFLAGS += -Wall -Werror -Wextra -Wpedantic -Og -g -fPIE
+
+.PHONY: all clean
+
+all: target target.offsets.main
+
+clean:
+	rm -rf *.o target target.offsets.*
+
+target.o: target.c
+	${CC} ${CFLAGS} -c target.c -o $@
+
+target: target.o
+	${CC} ${CFLAGS} target.o -o $@
+
+target.offsets.main: target
+	./get_offsets.py
\ No newline at end of file
diff --git a/unicorn_mode/samples/speedtest/README.md b/unicorn_mode/samples/speedtest/README.md
new file mode 100644
index 00000000..3c1184a2
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/README.md
@@ -0,0 +1,65 @@
+# Speedtest
+
+This is a simple sample harness for a non-crashing file,
+to show the raw speed of C, Rust, and Python harnesses.
+
+## Compiling...
+
+Make sure, you built unicornafl first (`../../build_unicorn_support.sh`).
+Then, follow these individual steps:
+
+### Rust
+
+```bash
+cd rust
+cargo build --release
+../../../afl-fuzz -i ../sample_inputs -o out -- ./target/release/harness @@
+```
+
+### C
+
+```bash
+cd c
+make
+../../../afl-fuzz -i ../sample_inputs -o out -- ./harness @@
+```
+
+### python
+
+```bash
+cd python
+../../../afl-fuzz -i ../sample_inputs -o out -U -- python3 ./harness.py @@
+```
+
+## Results
+
+TODO: add results here.
+
+
+## Compiling speedtest_target.c
+
+You shouldn't need to compile simple_target.c since a X86_64 binary version is
+pre-built and shipped in this sample folder. This file documents how the binary
+was built in case you want to rebuild it or recompile it for any reason.
+
+The pre-built binary (simple_target_x86_64.bin) was built using -g -O0 in gcc.
+
+We then load the binary and execute the main function directly.
+
+## Addresses for the harness:
+To find the address (in hex) of main, run:
+```bash
+objdump -M intel -D target | grep '<main>:' | cut -d" " -f1
+```
+To find all call sites to magicfn, run:
+```bash
+objdump -M intel -D target | grep '<magicfn>$' | cut -d":" -f1
+```
+For malloc callsites:
+```bash
+objdump -M intel -D target | grep '<malloc@plt>$' | cut -d":" -f1
+```
+And free callsites:
+```bash
+objdump -M intel -D target | grep '<free@plt>$' | cut -d":" -f1
+```
diff --git a/unicorn_mode/samples/speedtest/c/Makefile b/unicorn_mode/samples/speedtest/c/Makefile
new file mode 100644
index 00000000..ce784d4f
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/c/Makefile
@@ -0,0 +1,54 @@
+# UnicornAFL Usage
+# Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015
+# Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020
+.POSIX:
+UNAME_S =$(shell uname -s)# GNU make
+UNAME_S:sh=uname -s       # BSD make
+_UNIQ=_QINU_
+
+LIBDIR = ../../../unicornafl
+BIN_EXT =
+AR_EXT = a
+
+# Verbose output?
+V ?= 0
+
+CFLAGS += -Wall -Werror -Wextra -Wno-unused-parameter -I../../../unicornafl/include
+
+LDFLAGS += -L$(LIBDIR) -lpthread -lm
+
+_LRT = $(_UNIQ)$(UNAME_S:Linux=)
+__LRT = $(_LRT:$(_UNIQ)=-lrt)
+LRT = $(__LRT:$(_UNIQ)=)
+
+LDFLAGS += $(LRT)
+
+_CC = $(_UNIQ)$(CROSS)
+__CC = $(_CC:$(_UNIQ)=$(CC))
+MYCC = $(__CC:$(_UNIQ)$(CROSS)=$(CROSS)gcc)
+
+.PHONY: all clean
+
+all: fuzz
+
+clean:
+	rm -rf *.o harness harness-debug
+
+harness.o: harness.c ../../../unicornafl/include/unicorn/*.h
+	${MYCC} ${CFLAGS} -O3 -c harness.c -o $@
+
+harness-debug.o: harness.c ../../../unicornafl/include/unicorn/*.h
+	${MYCC} ${CFLAGS} -fsanitize=address -g -Og -c harness.c -o $@
+
+harness: harness.o
+	${MYCC} -L${LIBDIR} harness.o ../../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
+
+harness-debug: harness-debug.o
+	${MYCC} -fsanitize=address -g -Og -L${LIBDIR} harness-debug.o ../../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug
+
+../target:
+	$(MAKE) -C ..
+
+fuzz: ../target harness
+	rm -rf ./output
+	SKIP_BINCHECK=1 ../../../../afl-fuzz -s 1 -i ../sample_inputs -o ./output -- ./harness @@
diff --git a/unicorn_mode/samples/speedtest/c/harness.c b/unicorn_mode/samples/speedtest/c/harness.c
new file mode 100644
index 00000000..e8de3d80
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/c/harness.c
@@ -0,0 +1,390 @@
+/*
+   Simple test harness for AFL++'s unicornafl c mode.
+
+   This loads the simple_target_x86_64 binary into
+   Unicorn's memory map for emulation, places the specified input into
+   argv[1], sets up argv, and argc and executes 'main()'.
+   If run inside AFL, afl_fuzz automatically does the "right thing"
+
+   Run under AFL as follows:
+
+   $ cd <afl_path>/unicorn_mode/samples/simple/
+   $ make
+   $ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@
+*/
+
+// This is not your everyday Unicorn.
+#define UNICORN_AFL
+
+#include <string.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <unicorn/unicorn.h>
+
+// Path to the file containing the binary to emulate
+#define BINARY_FILE ("../target")
+
+// Memory map for the code to be tested
+// Arbitrary address where code to test will be loaded
+static const int64_t BASE_ADDRESS = 0x0;
+// Max size for the code (64kb)
+static const int64_t CODE_SIZE_MAX = 0x00010000;
+// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) )
+static const int64_t INPUT_ADDRESS = 0x00100000;
+// Maximum size for our input
+static const int64_t INPUT_MAX = 0x00100000;
+// Where our pseudo-heap is at
+static const int64_t HEAP_ADDRESS = 0x00200000;
+// Maximum allowable size for the heap
+static const int64_t HEAP_SIZE_MAX = 0x000F0000;
+// Address of the stack (Some random address again)
+static const int64_t STACK_ADDRESS = 0x00400000;
+// Size of the stack (arbitrarily chosen, just make it big enough)
+static const int64_t STACK_SIZE = 0x000F0000;
+
+// Alignment for unicorn mappings (seems to be needed)
+static const int64_t ALIGNMENT = 0x1000;
+
+static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size);
+}
+
+static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size);
+}
+
+/* Unicorn page needs to be 0x1000 aligned, apparently */
+static uint64_t pad(uint64_t size) {
+    if (size % ALIGNMENT == 0) { return size; }
+    return ((size / ALIGNMENT) + 1) * ALIGNMENT;
+} 
+
+/* returns the filesize in bytes, -1 or error. */
+static off_t afl_mmap_file(char *filename, char **buf_ptr) {
+
+    off_t ret = -1;
+
+    int fd = open(filename, O_RDONLY);
+
+    struct stat st = {0};
+    if (fstat(fd, &st)) goto exit;
+
+    off_t in_len = st.st_size;
+    if (in_len == -1) {
+        /* This can only ever happen on 32 bit if the file is exactly 4gb. */
+        fprintf(stderr, "Filesize of %s too large\n", filename);
+        goto exit;
+    }
+
+    *buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+    if (*buf_ptr != MAP_FAILED) ret = in_len;
+
+exit:
+    close(fd);
+    return ret;
+
+}
+
+/* Place the input at the right spot inside unicorn.
+   This code path is *HOT*, do as little work as possible! */
+static bool place_input_callback(
+    uc_engine *uc, 
+    char *input, 
+    size_t input_len, 
+    uint32_t persistent_round, 
+    void *data
+){
+    // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS);
+    if (input_len >= INPUT_MAX) {
+        // Test input too short or too long, ignore this testcase
+        return false;
+    }
+
+    // We need a valid c string, make sure it never goes out of bounds.
+    input[input_len-1] = '\0';
+
+    // Write the testcase to unicorn.
+    uc_mem_write(uc, INPUT_ADDRESS, input, input_len);
+
+    return true;
+}
+
+// exit in case the unicorn-internal mmap fails.
+static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) {
+    size = pad(size);
+    //printf("SIZE %llx, align: %llx\n", size, ALIGNMENT);
+    uc_err err = uc_mem_map(uc, addr, size, mode);
+    if (err != UC_ERR_OK) {
+        printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode);
+        exit(1);
+    }
+}
+
+// allocates an array, reads all addrs to the given array ptr, returns a size
+ssize_t read_all_addrs(char *path, uint64_t *addrs, size_t max_count) {
+
+    FILE *f = fopen(path, "r"); 
+    if (!f) {
+        perror("fopen");
+        fprintf(stderr, "Could not read %s, make sure you ran ./get_offsets.py\n", path);
+        exit(-1);
+    }
+    for (size_t i = 0; i < max_count; i++) {
+        bool end = false;
+        if(fscanf(f, "%lx", &addrs[i]) == EOF) {
+            end = true;
+            i--;
+        } else if (fgetc(f) == EOF) {
+            end = true;
+        }
+        if (end) {
+            printf("Set %ld addrs for %s\n", i + 1, path);
+            fclose(f);
+            return i + 1;
+        }
+    }
+    return max_count;
+}
+
+// Read all addresses from the given file, and set a hook for them.
+void set_all_hooks(uc_engine *uc, char *hook_file, void *hook_fn) {
+
+    FILE *f = fopen(hook_file, "r");
+    if (!f) {
+        fprintf(stderr, "Could not read %s, make sure you ran ./get_offsets.py\n", hook_file);
+        exit(-1);
+    }
+    uint64_t hook_addr;
+    for (int hook_count = 0; 1; hook_count++) {
+        if(fscanf(f, "%lx", &hook_addr) == EOF) {
+            printf("Set %d hooks for %s\n", hook_count, hook_file);
+            fclose(f);
+            return;
+        }
+        printf("got new hook addr %lx (count: %d) ohbytw: sizeof %lx\n", hook_addr, hook_count, sizeof(uc_hook));
+        hook_addr += BASE_ADDRESS;
+        // We'll leek these hooks like a good citizen.
+        uc_hook *hook = calloc(1, sizeof(uc_hook));
+        if (!hook) {
+            perror("calloc");
+            exit(-1);
+        }
+        uc_hook_add(uc, hook, UC_HOOK_CODE, hook_fn, NULL, hook_addr, hook_addr);
+        // guzzle up newline
+        if (fgetc(f) == EOF) {
+            printf("Set %d hooks for %s\n", hook_count, hook_file);
+            fclose(f);
+            return;
+        }
+    }
+
+}
+
+// This is a fancy print function that we're just going to skip for fuzzing.
+static void hook_magicfn(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    address += size;
+    uc_reg_write(uc, UC_X86_REG_RIP, &address);
+} 
+
+static bool already_allocated = false;
+
+// We use a very simple malloc/free stub here, that only works for exactly one allocation at a time.
+static void hook_malloc(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    if (already_allocated) {
+        printf("Double malloc, not supported right now!\n");
+        abort();
+    }
+    // read the first param.
+    uint64_t malloc_size;
+    uc_reg_read(uc, UC_X86_REG_RDI, &malloc_size);
+    if (malloc_size > HEAP_SIZE_MAX) {
+        printf("Tried to allocated %ld bytes, but we only support up to %ld\n", malloc_size, HEAP_SIZE_MAX);
+        abort();
+    }
+    uc_reg_write(uc, UC_X86_REG_RAX, &HEAP_ADDRESS);
+    address += size;
+    uc_reg_write(uc, UC_X86_REG_RIP, &address);
+    already_allocated = true;
+}
+
+// No real free, just set the "used"-flag to false.
+static void hook_free(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) {
+    if (!already_allocated) {
+        printf("Double free detected. Real bug?\n");
+        abort();
+    }
+    // read the first param.
+    uint64_t free_ptr;
+    uc_reg_read(uc, UC_X86_REG_RDI, &free_ptr);
+    if (free_ptr != HEAP_ADDRESS) {
+        printf("Tried to free wrong mem region: 0x%lx at code loc 0x%lx\n", free_ptr, address);
+        abort();
+    }
+    address +=  size;
+    uc_reg_write(uc, UC_X86_REG_RIP, &address);
+    already_allocated = false;
+}
+
+int main(int argc, char **argv, char **envp) {
+    if (argc == 1) {
+        printf("Test harness to measure speed against Rust and python. Usage: harness [-t] <inputfile>\n");
+        exit(1);
+    }
+    bool tracing = false;
+    char *filename = argv[1];
+    if (argc > 2 && !strcmp(argv[1], "-t")) {
+        tracing = true;
+        filename = argv[2];
+    }
+
+    uc_engine *uc;
+    uc_err err;
+    uc_hook hooks[2];
+    char *file_contents;
+
+    // Initialize emulator in X86_64 mode
+    err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc);
+    if (err) {
+        printf("Failed on uc_open() with error returned: %u (%s)\n",
+                err, uc_strerror(err));
+        return -1;
+    }
+
+    // If we want tracing output, set the callbacks here
+    if (tracing) {
+        // tracing all basic blocks with customized callback
+        uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
+        uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, 1, 0);
+    }
+
+    printf("The input testcase is set to %s\n", filename);
+
+
+    printf("Loading target from %s\n", BINARY_FILE);
+    off_t len = afl_mmap_file(BINARY_FILE, &file_contents);
+    printf("Binary file size: %lx\n", len);
+    if (len < 0) {
+        perror("Could not read binary to emulate");
+        return -2;
+    }
+    if (len == 0) {
+        fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE);
+        return -3;
+    }
+    if (len > CODE_SIZE_MAX) {
+        fprintf(stderr, "Binary too large, increase CODE_SIZE_MAX\n");
+        return -4;
+    }
+
+    // Map memory.
+    mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL);
+    fflush(stdout);
+
+    // write machine code to be emulated to memory
+    if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) {
+        puts("Error writing to CODE");
+        exit(-1);
+    }
+
+    // Release copied contents
+    munmap(file_contents, len);
+
+    // Set the program counter to the start of the code
+    FILE *f = fopen("../target.offsets.main", "r");
+    if (!f) {
+        perror("fopen");
+        puts("Could not read offset to main function, make sure you ran ./get_offsets.py");
+        exit(-1);
+    }
+    uint64_t start_address;
+    if(fscanf(f, "%lx", &start_address) == EOF) {
+        puts("Start address not found in target.offests.main");
+        exit(-1);
+    }
+    fclose(f);
+    start_address += BASE_ADDRESS;
+    printf("Execution will start at 0x%lx", start_address);
+    // Set the program counter to the start of the code
+    uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main()
+
+    // Setup the Stack
+    mem_map_checked(uc, STACK_ADDRESS, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE);
+    // Setup the stack pointer, but allocate two pointers for the pointers to input
+    uint64_t val = STACK_ADDRESS + STACK_SIZE - 16;
+    //printf("Stack at %lu\n", stack_val);
+    uc_reg_write(uc, UC_X86_REG_RSP, &val);
+
+    // reserve some space for our input data
+    mem_map_checked(uc, INPUT_ADDRESS, INPUT_MAX, UC_PROT_READ);
+
+    // argc = 2
+    val = 2;
+    uc_reg_write(uc, UC_X86_REG_RDI, &val);
+    //RSI points to our little 2 QWORD space at the beginning of the stack...
+    val = STACK_ADDRESS + STACK_SIZE - 16;
+    uc_reg_write(uc, UC_X86_REG_RSI, &val);
+
+    //... which points to the Input. Write the ptr to mem in little endian.
+    uint32_t addr_little = STACK_ADDRESS;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    // The chances you are on a big_endian system aren't too high, but still...
+    __builtin_bswap32(addr_little);
+#endif
+
+    uc_mem_write(uc, STACK_ADDRESS + STACK_SIZE - 16, (char *)&addr_little, 4);
+
+    set_all_hooks(uc, "../target.offsets.malloc", hook_malloc);
+    set_all_hooks(uc, "../target.offsets.magicfn", hook_magicfn);
+    set_all_hooks(uc, "../target.offsets.free", hook_free);
+
+    int exit_count_max = 100;
+    // we don't need more exits for now.
+    uint64_t exits[exit_count_max];
+
+    ssize_t exit_count = read_all_addrs("../target.offsets.main_ends", exits, exit_count_max);
+    if (exit_count < 1) {
+        printf("Could not find exits! aborting.\n");
+        abort();
+    }
+
+    printf("Starting to fuzz. Running from addr %ld to one of these %ld exits:\n", start_address, exit_count);
+    for (ssize_t i = 0; i < exit_count; i++) {
+        printf("    exit %ld: %ld\n", i, exits[i]);
+    }
+
+    fflush(stdout);
+
+    // let's gooo
+    uc_afl_ret afl_ret = uc_afl_fuzz(
+        uc, // The unicorn instance we prepared
+        filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file.
+        place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance
+        exits, // Where to exit (this is an array)
+        exit_count,  // Count of end addresses
+        NULL, // Optional calback to run after each exec
+        false, // true, if the optional callback should be run also for non-crashes
+        1000, // For persistent mode: How many rounds to run
+        NULL // additional data pointer
+    );
+    switch(afl_ret) {
+        case UC_AFL_RET_ERROR:
+            printf("Error starting to fuzz");
+            return -3;
+            break;
+        case UC_AFL_RET_NO_AFL:
+            printf("No AFL attached - We are done with a single run.");
+            break;
+        default:
+            break;
+    } 
+    return 0;
+}
diff --git a/unicorn_mode/samples/speedtest/get_offsets.py b/unicorn_mode/samples/speedtest/get_offsets.py
new file mode 100644
index 00000000..c9dc76df
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/get_offsets.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+"""This simple script uses objdump to parse important addresses from the target"""
+import shlex
+import subprocess
+
+objdump_output = subprocess.check_output(
+    shlex.split("objdump -M intel -D target")
+).decode()
+main_loc = None
+main_ends = []
+main_ended = False
+magicfn_calls = []
+malloc_calls = []
+free_calls = []
+strlen_calls = []
+
+
+def line2addr(line):
+    return "0x" + line.split(":", 1)[0].strip()
+
+
+last_line = None
+for line in objdump_output.split("\n"):
+    line = line.strip()
+
+    def read_addr_if_endswith(findme, list_to):
+        """
+        Look, for example, for the addr like:
+        12a9:       e8 f2 fd ff ff          call   10a0 <free@plt>
+        """
+        if line.endswith(findme):
+            list_to.append(line2addr(line))
+
+    if main_loc is not None and main_ended is False:
+        # We want to know where main ends. An empty line in objdump.
+        if len(line) == 0:
+            main_ends.append(line2addr(last_line))
+            main_ended = True
+        elif "ret" in line:
+            main_ends.append(line2addr(line))
+
+    if "<main>:" in line:
+        if main_loc is not None:
+            raise Exception("Found multiple main functions, odd target!")
+        # main_loc is the label, so it's parsed differntly (i.e. `0000000000001220 <main>:`)
+        main_loc = "0x" + line.strip().split(" ", 1)[0].strip()
+    else:
+        [
+            read_addr_if_endswith(*x)
+            for x in [
+                ("<free@plt>", free_calls),
+                ("<malloc@plt>", malloc_calls),
+                ("<strlen@plt>", strlen_calls),
+                ("<magicfn>", magicfn_calls),
+            ]
+        ]
+
+    last_line = line
+
+if main_loc is None:
+    raise (
+        "Could not find main in ./target! Make sure objdump is installed and the target is compiled."
+    )
+
+with open("target.offsets.main", "w") as f:
+    f.write(main_loc)
+with open("target.offsets.main_ends", "w") as f:
+    f.write("\n".join(main_ends))
+with open("target.offsets.magicfn", "w") as f:
+    f.write("\n".join(magicfn_calls))
+with open("target.offsets.malloc", "w") as f:
+    f.write("\n".join(malloc_calls))
+with open("target.offsets.free", "w") as f:
+    f.write("\n".join(free_calls))
+with open("target.offsets.strlen", "w") as f:
+    f.write("\n".join(strlen_calls))
diff --git a/unicorn_mode/samples/speedtest/python/Makefile b/unicorn_mode/samples/speedtest/python/Makefile
new file mode 100644
index 00000000..4282c6cb
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/python/Makefile
@@ -0,0 +1,8 @@
+all: fuzz
+
+../target:
+	$(MAKE) -C ..
+
+fuzz: ../target
+	rm -rf ./ouptput
+	../../../../afl-fuzz -s 1 -U -i ../sample_inputs -o ./output -- python3 harness.py @@
diff --git a/unicorn_mode/samples/speedtest/python/harness.py b/unicorn_mode/samples/speedtest/python/harness.py
new file mode 100644
index 00000000..f72eb32b
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/python/harness.py
@@ -0,0 +1,277 @@
+#!/usr/bin/env python3
+""" 
+    Simple test harness for AFL's Unicorn Mode.
+
+    This loads the speedtest target binary (precompiled X64 code) into
+    Unicorn's memory map for emulation, places the specified input into
+    Argv, and executes main.
+    There should not be any crashes - it's a speedtest against Rust and c.
+
+    Before running this harness, call make in the parent folder.
+
+    Run under AFL as follows:
+
+    $ cd <afl_path>/unicorn_mode/samples/speedtest/python
+    $ ../../../../afl-fuzz -U -i ../sample_inputs -o ./output -- python3 harness.py @@
+"""
+
+import argparse
+import os
+import struct
+
+from unicornafl import *
+from unicornafl.unicorn_const import UC_ARCH_X86, UC_HOOK_CODE, UC_MODE_64
+from unicornafl.x86_const import (
+    UC_X86_REG_RAX,
+    UC_X86_REG_RDI,
+    UC_X86_REG_RIP,
+    UC_X86_REG_RSI,
+    UC_X86_REG_RSP,
+)
+
+# Memory map for the code to be tested
+BASE_ADDRESS = 0x0  # Arbitrary address where the (PIE) target binary will be loaded to
+CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
+INPUT_ADDRESS = 0x00100000  # where we put our stuff
+INPUT_MAX = 0x00100000  # max size for our input
+HEAP_ADDRESS = 0x00200000  # Heap addr
+HEAP_SIZE_MAX = 0x000F0000  # Maximum allowable size for the heap
+STACK_ADDRESS = 0x00400000  # Address of the stack (arbitrarily chosen)
+STACK_SIZE = 0x000F0000  # Size of the stack (arbitrarily chosen)
+
+target_path = os.path.abspath(
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
+)
+target_bin = os.path.join(target_path, "target")
+
+
+def get_offsets_for(name):
+    full_path = os.path.join(target_path, f"target.offsets.{name}")
+    with open(full_path) as f:
+        return [int(x, 16) + BASE_ADDRESS for x in f.readlines()]
+
+
+# Read all offsets from our objdump file
+main_offset = get_offsets_for("main")[0]
+main_ends = get_offsets_for("main_ends")
+malloc_callsites = get_offsets_for("malloc")
+free_callsites = get_offsets_for("free")
+magicfn_callsites = get_offsets_for("magicfn")
+# Joke's on me: strlen got inlined by my compiler
+strlen_callsites = get_offsets_for("strlen")
+
+try:
+    # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
+    from capstone import *
+
+    cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+
+    def unicorn_debug_instruction(uc, address, size, user_data):
+        mem = uc.mem_read(address, size)
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
+            print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
+except ImportError:
+
+    def unicorn_debug_instruction(uc, address, size, user_data):
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
+
+def unicorn_debug_block(uc, address, size, user_data):
+    print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
+
+def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
+    if access == UC_MEM_WRITE:
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
+    else:
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
+
+
+def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
+    if access == UC_MEM_WRITE_UNMAPPED:
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
+    else:
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
+
+already_allocated = False
+
+
+def malloc_hook(uc, address, size, user_data):
+    """
+    We use a very simple malloc/free stub here, that only works for exactly one allocation at a time.
+    """
+    global already_allocated
+    if already_allocated:
+        print("Double malloc, not supported right now!")
+        os.abort()
+    # read the first param
+    malloc_size = uc.reg_read(UC_X86_REG_RDI)
+    if malloc_size > HEAP_SIZE_MAX:
+        print(
+            f"Tried to allocate {malloc_size} bytes, aint't nobody got space for that! (We may only allocate up to {HEAP_SIZE_MAX})"
+        )
+        os.abort()
+    uc.reg_write(UC_X86_REG_RAX, HEAP_ADDRESS)
+    uc.reg_write(UC_X86_REG_RIP, address + size)
+    already_allocated = True
+
+
+def free_hook(uc, address, size, user_data):
+    """
+    No real free, just set the "used"-flag to false.
+    """
+    global already_allocated
+    if not already_allocated:
+        print("Double free detected. Real bug?")
+        os.abort()
+    # read the first param
+    free_ptr = uc.reg_read(UC_X86_REG_RDI)
+    if free_ptr != HEAP_ADDRESS:
+        print(
+            f"Tried to free wrong mem region: {hex(free_ptr)} at code loc {hex(address)}"
+        )
+        os.abort()
+    uc.reg_write(UC_X86_REG_RIP, address + size)
+    already_allocated = False
+
+
+# def strlen_hook(uc, address, size, user_data):
+#     """
+#     No real strlen, we know the len is == our input.
+#     This completely ignores '\0', but for this target, do we really care?
+#     """
+#     global input_len
+#     print(f"Returning len {input_len}")
+#     uc.reg_write(UC_X86_REG_RAX, input_len)
+#     uc.reg_write(UC_X86_REG_RIP, address + size)
+
+
+def magicfn_hook(uc, address, size, user_data):
+    """
+    This is a fancy print function that we're just going to skip for fuzzing.
+    """
+    uc.reg_write(UC_X86_REG_RIP, address + size)
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-t",
+        "--trace",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
+    args = parser.parse_args()
+
+    # Instantiate a MIPS32 big endian Unicorn Engine instance
+    uc = Uc(UC_ARCH_X86, UC_MODE_64)
+
+    if args.trace:
+        uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
+        uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
+        uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
+
+    print("The input testcase is set to {}".format(args.input_file))
+
+    # ---------------------------------------------------
+    # Load the binary to emulate and map it into memory
+    with open(target_bin, "rb") as f:
+        binary_code = f.read()
+
+    # Apply constraints to the mutated input
+    if len(binary_code) > CODE_SIZE_MAX:
+        print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX))
+        return
+
+    # Write the binary to its place in mem
+    uc.mem_map(BASE_ADDRESS, CODE_SIZE_MAX)
+    uc.mem_write(BASE_ADDRESS, binary_code)
+
+    # Set the program counter to the start of the code
+    uc.reg_write(UC_X86_REG_RIP, main_offset)
+
+    # Setup the stack.
+    uc.mem_map(STACK_ADDRESS, STACK_SIZE)
+    # Setup the stack pointer, but allocate two pointers for the pointers to input.
+    uc.reg_write(UC_X86_REG_RSP, STACK_ADDRESS + STACK_SIZE - 16)
+
+    # Setup our input space, and push the pointer to it in the function params
+    uc.mem_map(INPUT_ADDRESS, INPUT_MAX)
+    # We have argc = 2
+    uc.reg_write(UC_X86_REG_RDI, 2)
+    # RSI points to our little 2 QWORD space at the beginning of the stack...
+    uc.reg_write(UC_X86_REG_RSI, STACK_ADDRESS + STACK_SIZE - 16)
+    # ... which points to the Input. Write the ptr to mem in little endian.
+    uc.mem_write(STACK_ADDRESS + STACK_SIZE - 16, struct.pack("<Q", INPUT_ADDRESS))
+
+    for addr in malloc_callsites:
+        uc.hook_add(UC_HOOK_CODE, malloc_hook, begin=addr, end=addr)
+
+    for addr in free_callsites:
+        uc.hook_add(UC_HOOK_CODE, free_hook, begin=addr, end=addr)
+
+    if len(strlen_callsites):
+        # strlen got inlined for my compiler.
+        print(
+            "Oops, your compiler emitted strlen as function. You may have to change the harness."
+        )
+    # for addr in strlen_callsites:
+    #     uc.hook_add(UC_HOOK_CODE, strlen_hook, begin=addr, end=addr)
+
+    for addr in magicfn_callsites:
+        uc.hook_add(UC_HOOK_CODE, magicfn_hook, begin=addr, end=addr + 1)
+
+    # -----------------------------------------------------
+    # Set up a callback to place input data (do little work here, it's called for every single iteration! This code is *HOT*)
+    # We did not pass in any data and don't use persistent mode, so we can ignore these params.
+    # Be sure to check out the docstrings for the uc.afl_* functions.
+    def place_input_callback(uc, input, persistent_round, data):
+        # Apply constraints to the mutated input
+        input_len = len(input)
+        # global input_len
+        if input_len > INPUT_MAX:
+            #print("Test input is too long (> {} bytes)")
+            return False
+
+        # print(f"Placing input: {input} in round {persistent_round}")
+
+        # Make sure the string is always 0-terminated (as it would be "in the wild")
+        input[-1] = b'\0'
+
+        # Write the mutated command into the data buffer
+        uc.mem_write(INPUT_ADDRESS, input)
+        #uc.reg_write(UC_X86_REG_RIP, main_offset)
+
+    print(f"Starting to fuzz. Running from addr {main_offset} to one of {main_ends}")
+    # Start the fuzzer.
+    uc.afl_fuzz(args.input_file, place_input_callback, main_ends, persistent_iters=1000)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/unicorn_mode/samples/speedtest/rust/.gitignore b/unicorn_mode/samples/speedtest/rust/.gitignore
new file mode 100644
index 00000000..eb5a316c
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/.gitignore
@@ -0,0 +1 @@
+target
diff --git a/unicorn_mode/samples/speedtest/rust/Cargo.lock b/unicorn_mode/samples/speedtest/rust/Cargo.lock
new file mode 100644
index 00000000..5887facf
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/Cargo.lock
@@ -0,0 +1,80 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "bitflags"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
+
+[[package]]
+name = "build-helper"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdce191bf3fa4995ce948c8c83b4640a1745457a149e73c6db75b4ffe36aad5f"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "capstone"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "031ba51c39151a1d6336ec859646153187204b0147c7b3f6fe2de636f1b8dbb3"
+dependencies = [
+ "capstone-sys",
+]
+
+[[package]]
+name = "capstone-sys"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fae25eddcb80e24f98c35952c37a91ff7f8d0f60dbbdafb9763e8d5cc566b8d7"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "cc"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
+
+[[package]]
+name = "libc"
+version = "0.2.82"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929"
+
+[[package]]
+name = "semver"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a3186ec9e65071a2095434b1f5bb24838d4e8e130f584c790f6033c79943537"
+dependencies = [
+ "semver-parser",
+]
+
+[[package]]
+name = "semver-parser"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
+
+[[package]]
+name = "unicornafl"
+version = "1.0.0"
+dependencies = [
+ "bitflags",
+ "build-helper",
+ "capstone",
+ "libc",
+]
+
+[[package]]
+name = "unicornafl_harness"
+version = "0.1.0"
+dependencies = [
+ "capstone",
+ "libc",
+ "unicornafl",
+]
diff --git a/unicorn_mode/samples/speedtest/rust/Cargo.toml b/unicorn_mode/samples/speedtest/rust/Cargo.toml
new file mode 100644
index 00000000..c19ee0a1
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "unicornafl_harness"
+version = "0.1.0"
+authors = ["Dominik Maier <domenukk@gmail.com>"]
+edition = "2018"
+
+[profile.release]
+lto = true
+opt-level = 3
+panic = "abort"
+
+[dependencies]
+unicornafl = { path = "../../../unicornafl/bindings/rust/", version="1.0.0" }
+capstone="0.6.0"
+libc="0.2.66"
\ No newline at end of file
diff --git a/unicorn_mode/samples/speedtest/rust/Makefile b/unicorn_mode/samples/speedtest/rust/Makefile
new file mode 100644
index 00000000..fe18d6ee
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/Makefile
@@ -0,0 +1,17 @@
+all: fuzz
+
+clean:
+	cargo clean
+
+./target/release/unicornafl_harness: ./src/main.rs
+	cargo build --release
+
+./target/debug/unicornafl_harness: ./src/main.rs
+	cargo build
+
+../target:
+	$(MAKE) -c ..
+
+fuzz: ../target ./target/release/unicornafl_harness
+	rm -rf ./output
+	SKIP_BINCHECK=1 ../../../../afl-fuzz -s 1 -i ../sample_inputs -o ./output -- ./target/release/unicornafl_harness @@
diff --git a/unicorn_mode/samples/speedtest/rust/src/main.rs b/unicorn_mode/samples/speedtest/rust/src/main.rs
new file mode 100644
index 00000000..8e31d2e2
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/rust/src/main.rs
@@ -0,0 +1,231 @@
+extern crate capstone;
+extern crate libc;
+
+use core::cell::Cell;
+use std::{
+    env,
+    fs::File,
+    io::{self, Read},
+    process::abort,
+    str,
+};
+
+use unicornafl::{
+    unicorn_const::{uc_error, Arch, Mode, Permission},
+    RegisterX86::{self, *},
+    Unicorn, UnicornHandle,
+};
+
+const BINARY: &str = &"../target";
+
+// Memory map for the code to be tested
+// Arbitrary address where code to test will be loaded
+const BASE_ADDRESS: u64 = 0x0;
+// Max size for the code (64kb)
+const CODE_SIZE_MAX: u64 = 0x00010000;
+// Location where the input will be placed (make sure the uclated program knows this somehow, too ;) )
+const INPUT_ADDRESS: u64 = 0x00100000;
+// Maximum size for our input
+const INPUT_MAX: u64 = 0x00100000;
+// Where our pseudo-heap is at
+const HEAP_ADDRESS: u64 = 0x00200000;
+// Maximum allowable size for the heap
+const HEAP_SIZE_MAX: u64 = 0x000F0000;
+// Address of the stack (Some random address again)
+const STACK_ADDRESS: u64 = 0x00400000;
+// Size of the stack (arbitrarily chosen, just make it big enough)
+const STACK_SIZE: u64 = 0x000F0000;
+
+fn read_file(filename: &str) -> Result<Vec<u8>, io::Error> {
+    let mut f = File::open(filename)?;
+    let mut buffer = Vec::new();
+    f.read_to_end(&mut buffer)?;
+    Ok(buffer)
+}
+
+/// Our location parser
+fn parse_locs(loc_name: &str) -> Result<Vec<u64>, io::Error> {
+    let contents = &read_file(&format!("../target.offsets.{}", loc_name))?;
+    //println!("Read: {:?}", contents);
+    Ok(str_from_u8_unchecked(&contents)
+        .split("\n")
+        .map(|x| {
+            //println!("Trying to convert {}", &x[2..]);
+            let result = u64::from_str_radix(&x[2..], 16);
+            result.unwrap()
+        })
+        .collect())
+}
+
+// find null terminated string in vec
+pub fn str_from_u8_unchecked(utf8_src: &[u8]) -> &str {
+    let nul_range_end = utf8_src
+        .iter()
+        .position(|&c| c == b'\0')
+        .unwrap_or(utf8_src.len());
+    unsafe { str::from_utf8_unchecked(&utf8_src[0..nul_range_end]) }
+}
+
+fn align(size: u64) -> u64 {
+    const ALIGNMENT: u64 = 0x1000;
+    if size % ALIGNMENT == 0 {
+        size
+    } else {
+        ((size / ALIGNMENT) + 1) * ALIGNMENT
+    }
+}
+
+fn main() {
+    let args: Vec<String> = env::args().collect();
+    if args.len() == 1 {
+        println!("Missing parameter <uclation_input> (@@ for AFL)");
+        return;
+    }
+    let input_file = &args[1];
+    println!("The input testcase is set to {}", input_file);
+    fuzz(input_file).unwrap();
+}
+
+fn fuzz(input_file: &str) -> Result<(), uc_error> {
+    let mut unicorn = Unicorn::new(Arch::X86, Mode::MODE_64, 0)?;
+    let mut uc: UnicornHandle<'_, _> = unicorn.borrow();
+
+    let binary = read_file(BINARY).expect(&format!("Could not read modem image: {}", BINARY));
+    let _aligned_binary_size = align(binary.len() as u64);
+    // Apply constraints to the mutated input
+    if binary.len() as u64 > CODE_SIZE_MAX {
+        println!("Binary code is too large (> {} bytes)", CODE_SIZE_MAX);
+    }
+
+    // Write the binary to its place in mem
+    uc.mem_map(BASE_ADDRESS, CODE_SIZE_MAX as usize, Permission::ALL)?;
+    uc.mem_write(BASE_ADDRESS, &binary)?;
+
+    // Set the program counter to the start of the code
+    let main_locs = parse_locs("main").unwrap();
+    //println!("Entry Point: {:x}", main_locs[0]);
+    uc.reg_write(RegisterX86::RIP as i32, main_locs[0])?;
+
+    // Setup the stack.
+    uc.mem_map(
+        STACK_ADDRESS,
+        STACK_SIZE as usize,
+        Permission::READ | Permission::WRITE,
+    )?;
+    // Setup the stack pointer, but allocate two pointers for the pointers to input.
+    uc.reg_write(RSP as i32, STACK_ADDRESS + STACK_SIZE - 16)?;
+
+    // Setup our input space, and push the pointer to it in the function params
+    uc.mem_map(INPUT_ADDRESS, INPUT_MAX as usize, Permission::READ)?;
+    // We have argc = 2
+    uc.reg_write(RDI as i32, 2)?;
+    // RSI points to our little 2 QWORD space at the beginning of the stack...
+    uc.reg_write(RSI as i32, STACK_ADDRESS + STACK_SIZE - 16)?;
+    // ... which points to the Input. Write the ptr to mem in little endian.
+    uc.mem_write(
+        STACK_ADDRESS + STACK_SIZE - 16,
+        &(INPUT_ADDRESS as u32).to_le_bytes(),
+    )?;
+
+    let already_allocated = Cell::new(false);
+
+    let already_allocated_malloc = already_allocated.clone();
+    // We use a very simple malloc/free stub here,
+    // that only works for exactly one allocation at a time.
+    let hook_malloc = move |mut uc: UnicornHandle<'_, _>, addr: u64, size: u32| {
+        if already_allocated_malloc.get() {
+            println!("Double malloc, not supported right now!");
+            abort();
+        }
+        // read the first param
+        let malloc_size = uc.reg_read(RDI as i32).unwrap();
+        if malloc_size > HEAP_SIZE_MAX {
+            println!(
+                "Tried to allocate {} bytes, but we may only allocate up to {}",
+                malloc_size, HEAP_SIZE_MAX
+            );
+            abort();
+        }
+        uc.reg_write(RAX as i32, HEAP_ADDRESS).unwrap();
+        uc.reg_write(RIP as i32, addr + size as u64).unwrap();
+        already_allocated_malloc.set(true);
+    };
+
+    let already_allocated_free = already_allocated.clone();
+    // No real free, just set the "used"-flag to false.
+    let hook_free = move |mut uc: UnicornHandle<'_, _>, addr, size| {
+        if already_allocated_free.get() {
+            println!("Double free detected. Real bug?");
+            abort();
+        }
+        // read the first param
+        let free_ptr = uc.reg_read(RDI as i32).unwrap();
+        if free_ptr != HEAP_ADDRESS {
+            println!(
+                "Tried to free wrong mem region {:x} at code loc {:x}",
+                free_ptr, addr
+            );
+            abort();
+        }
+        uc.reg_write(RIP as i32, addr + size as u64).unwrap();
+        already_allocated_free.set(false);
+    };
+
+    /*
+        BEGIN FUNCTION HOOKS
+    */
+
+    // This is a fancy print function that we're just going to skip for fuzzing.
+    let hook_magicfn = move |mut uc: UnicornHandle<'_, _>, addr, size| {
+        uc.reg_write(RIP as i32, addr + size as u64).unwrap();
+    };
+
+    for addr in parse_locs("malloc").unwrap() {
+        //hook!(addr, hook_malloc, "malloc");
+        uc.add_code_hook(addr, addr, Box::new(hook_malloc.clone()))?;
+    }
+
+    for addr in parse_locs("free").unwrap() {
+        uc.add_code_hook(addr, addr, Box::new(hook_free.clone()))?;
+    }
+
+    for addr in parse_locs("magicfn").unwrap() {
+        uc.add_code_hook(addr, addr, Box::new(hook_magicfn.clone()))?;
+    }
+
+    let place_input_callback =
+        |mut uc: UnicornHandle<'_, _>, afl_input: &[u8], _persistent_round| {
+            // apply constraints to the mutated input
+            if afl_input.len() > INPUT_MAX as usize {
+                //println!("Skipping testcase with leng {}", afl_input.len());
+                return false;
+            }
+
+            // TODO: afl_input[-1] = b'\0'
+            uc.mem_write(INPUT_ADDRESS, afl_input).unwrap();
+            true
+        };
+
+    let crash_validation_callback =
+        |_uc: UnicornHandle<'_, _>, result, _input: &[u8], _persistent_round| {
+            result != uc_error::OK
+        };
+
+    let end_addrs = parse_locs("main_ends").unwrap();
+
+    let ret = uc.afl_fuzz(
+        input_file,
+        Box::new(place_input_callback),
+        &end_addrs,
+        Box::new(crash_validation_callback),
+        false,
+        1000,
+    );
+
+    match ret {
+        Ok(_) => {}
+        Err(e) => panic!(format!("found non-ok unicorn exit: {:?}", e)),
+    }
+
+    Ok(())
+}
diff --git a/unicorn_mode/samples/speedtest/sample_inputs/a b/unicorn_mode/samples/speedtest/sample_inputs/a
new file mode 100644
index 00000000..78981922
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/sample_inputs/a
@@ -0,0 +1 @@
+a
diff --git a/unicorn_mode/samples/speedtest/target.c b/unicorn_mode/samples/speedtest/target.c
new file mode 100644
index 00000000..8359a110
--- /dev/null
+++ b/unicorn_mode/samples/speedtest/target.c
@@ -0,0 +1,77 @@
+/*
+ * Sample target file to test afl-unicorn fuzzing capabilities.
+ * This is a very trivial example that will, however, never crash.
+ * Crashing would change the execution speed.
+ *
+ */
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Random print function we can hook in our harness to test hook speeds.
+char magicfn(char to_print) {
+  puts("Printing a char, just minding my own business: ");
+  putchar(to_print);
+  putchar('\n');
+  return to_print;
+}
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    printf("Gimme input pl0x!\n");
+    return -1;
+  }
+ 
+  // Make sure the hooks work...
+  char *test = malloc(1024);
+  if (!test) {
+    printf("Uh-Oh, malloc doesn't work!");
+    abort();
+  }
+  free(test);
+
+  char *data_buf = argv[1];
+  // We can start the unicorn hooking here.
+  uint64_t data_len = strlen(data_buf);
+  if (data_len < 20) return -2;
+
+  for (; data_len --> 0 ;) {
+    char *buf_cpy = NULL;
+    if (data_len) {
+      buf_cpy = malloc(data_len);
+      if (!buf_cpy) {
+        puts("Oof, malloc failed! :/");
+        abort();
+      }
+      memcpy(buf_cpy, data_buf, data_len);
+    }
+    if (data_len >= 18) {
+      free(buf_cpy);
+      continue;
+    }
+    if (data_len > 2 && data_len < 18) {
+      buf_cpy[data_len - 1] = (char) 0x90;
+    } else if (data_buf[9] == (char) 0x90 && data_buf[10] != 0x00 && buf_cpy[11] == (char) 0x90) {
+        // Cause a crash if data[10] is not zero, but [9] and [11] are zero
+        unsigned char valid_read = buf_cpy[10];
+        if (magicfn(valid_read) != valid_read) {
+          puts("Oof, the hook for data_buf[10] is broken?");
+          abort();
+        }
+    }
+    free(buf_cpy);
+  }
+  if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) {
+    // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2]
+    unsigned char valid_read = data_buf[0];
+    if (magicfn(valid_read) != valid_read) {
+      puts("Oof, the hook for data_buf[0] is broken?");
+      abort();
+    }
+  } 
+
+  magicfn('q');
+
+  return 0;
+}
diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl
-Subproject 83d1b426be5d373edcc81576f58a10f617df143
+Subproject 0dd17c58d51ed6dc69a367adbe8d2dca4d224c4
diff --git a/unicorn_mode/update_uc_ref.sh b/unicorn_mode/update_uc_ref.sh
index a2613942..7c1c7778 100755
--- a/unicorn_mode/update_uc_ref.sh
+++ b/unicorn_mode/update_uc_ref.sh
@@ -19,7 +19,7 @@ if [ "$NEW_VERSION" = "-h" ]; then
   exit 1
 fi
 
-git submodule init && git submodule update || exit 1
+git submodule init && git submodule update unicornafl || exit 1
 cd ./unicornafl || exit 1
 git fetch origin dev 1>/dev/null || exit 1
 git stash 1>/dev/null 2>/dev/null