15 files changed, 277 insertions, 54 deletions
diff --git a/frida_mode/GNUmakefile b/frida_mode/GNUmakefile
index 582cf8d6..44dfafe3 100644
--- a/frida_mode/GNUmakefile
+++ b/frida_mode/GNUmakefile
@@ -21,7 +21,7 @@ CFLAGS+=-fPIC \
 		-funroll-loops \
 		-ffunction-sections \
 
-RT_CFLAGS:=-Wno-unused-parameter \
+AFL_CFLAGS:=-Wno-unused-parameter \
 		   -Wno-sign-compare \
 		   -Wno-unused-function \
 		   -Wno-unused-result \
@@ -60,10 +60,10 @@ endif
 
 ifeq "$(shell uname)" "Darwin"
  OS:=macos
- RT_CFLAGS:=$(RT_CFLAGS) -Wno-deprecated-declarations
+ AFL_CFLAGS:=$(AFL_CFLAGS) -Wno-deprecated-declarations
 else
 ifdef DEBUG
- RT_CFLAGS:=$(RT_CFLAGS) -Wno-prio-ctor-dtor
+ AFL_CFLAGS:=$(AFL_CFLAGS) -Wno-prio-ctor-dtor
 endif
 LDFLAGS+=	-z noexecstack \
 			-Wl,--gc-sections \
@@ -79,7 +79,12 @@ ifndef OS
  $(error "Operating system unsupported")
 endif
 
+ifeq "$(ARCH)" "arm64"
+# 15.0.0 Not released for aarch64 yet
+GUM_DEVKIT_VERSION=14.2.18
+else
 GUM_DEVKIT_VERSION=15.0.0
+endif
 GUM_DEVKIT_FILENAME=frida-gumjs-devkit-$(GUM_DEVKIT_VERSION)-$(OS)-$(ARCH).tar.xz
 GUM_DEVKIT_URL="https://github.com/frida/frida/releases/download/$(GUM_DEVKIT_VERSION)/$(GUM_DEVKIT_FILENAME)"
 
@@ -98,6 +103,9 @@ FRIDA_GUM_DEVKIT_COMPRESSED_TARBALL:=$(FRIDA_DIR)build/$(GUM_DEVKIT_FILENAME)
 AFL_COMPILER_RT_SRC:=$(ROOT)instrumentation/afl-compiler-rt.o.c
 AFL_COMPILER_RT_OBJ:=$(OBJ_DIR)afl-compiler-rt.o
 
+AFL_PERFORMANCE_SRC:=$(ROOT)src/afl-performance.c
+AFL_PERFORMANCE_OBJ:=$(OBJ_DIR)afl-performance.o
+
 HOOK_DIR:=$(PWD)hook/
 AFLPP_FRIDA_DRIVER_HOOK_SRC=$(HOOK_DIR)frida_hook.c
 AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(BUILD_DIR)frida_hook.so
@@ -163,7 +171,16 @@ $(GUM_DEVIT_HEADER): $(GUM_DEVKIT_TARBALL)
 $(AFL_COMPILER_RT_OBJ): $(AFL_COMPILER_RT_SRC)
 	$(CC) \
 		$(CFLAGS) \
-		$(RT_CFLAGS) \
+		$(AFL_CFLAGS) \
+		-I $(ROOT) \
+		-I $(ROOT)include \
+		-o $@ \
+		-c $<
+
+$(AFL_PERFORMANCE_OBJ): $(AFL_PERFORMANCE_SRC)
+	$(CC) \
+		$(CFLAGS) \
+		$(AFL_CFLAGS) \
 		-I $(ROOT) \
 		-I $(ROOT)include \
 		-o $@ \
@@ -172,7 +189,7 @@ $(AFL_COMPILER_RT_OBJ): $(AFL_COMPILER_RT_SRC)
 ############################### JS #############################################
 
 $(BIN2C): $(BIN2C_SRC)
-	$(CC) -o $@ $<
+	$(CC) -D_GNU_SOURCE -o $@ $<
 
 $(JS_SRC): $(JS) $(BIN2C)| $(BUILD_DIR)
 	cd $(JS_DIR) && $(BIN2C) api_js $(JS) $@
@@ -203,12 +220,13 @@ $(foreach src,$(SOURCES),$(eval $(call BUILD_SOURCE,$(src),$(OBJ_DIR)$(notdir $(
 
 ######################## AFL-FRIDA-TRACE #######################################
 
-$(FRIDA_TRACE): $(GUM_DEVIT_LIBRARY) $(GUM_DEVIT_HEADER) $(OBJS) $(JS_OBJ) $(AFL_COMPILER_RT_OBJ) GNUmakefile | $(BUILD_DIR)
+$(FRIDA_TRACE): $(GUM_DEVIT_LIBRARY) $(GUM_DEVIT_HEADER) $(OBJS) $(JS_OBJ) $(AFL_COMPILER_RT_OBJ) $(AFL_PERFORMANCE_OBJ) GNUmakefile | $(BUILD_DIR)
 	$(CXX) \
 		$(OBJS) \
 		$(JS_OBJ) \
 		$(GUM_DEVIT_LIBRARY) \
 		$(AFL_COMPILER_RT_OBJ) \
+		$(AFL_PERFORMANCE_OBJ) \
 		$(LDFLAGS) \
 		$(LDSCRIPT) \
 		-o $@ \
diff --git a/frida_mode/MapDensity.md b/frida_mode/MapDensity.md
new file mode 100644
index 00000000..f4ae3ace
--- /dev/null
+++ b/frida_mode/MapDensity.md
@@ -0,0 +1,147 @@
+# Map Density
+
+# How Coverage Works
+The coverage in AFL++ works by assigning each basic block of code a unique ID
+and during execution when transitioning between blocks (e.g. by calls or jumps)
+assigning each of these edges an ID based upon the source and destination block
+ID.
+
+For each individual execution of the target, a single dimensional byte array
+indexed by the edge ID is used to count how many times each edge is traversed.
+
+A single dimensional cumulative byte array is also constructed where each byte
+again represents an individual edge ID, but this time, the value of the byte
+represents a range of how many times that edge has been traversed.
+
+```1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+```
+
+The theory is that a new path isn't particularly interesting if an edge has been
+traversed `23` instead of `24` times for example, but is interesting if an edge
+has been traversed for the very first time, or the number of times fits within a different bucket.
+
+After each run, the count of times each edge is hit is compared to the values in
+the cumulative map and if it is different, then the input is kept as a new seed
+and the cumulative map is updated.
+
+This mechanism is described in greater detail in the seminal
+[paper](https://lcamtuf.coredump.cx/afl/technical_details.txt) on AFL by
+[lcamtuf](https://github.com/lcamtuf).
+
+# Collisions
+In black-box fuzzing, we must assume that control may flow from any block to any
+other block, since we don't know any better. Thus for a target with `n` basic
+blocks of code, there are `n * n` potential edges. As we can see, even with a
+small number of edges, a very large map will be required so that we have space
+to fit them all. Even if our target only had `1024` blocks, this would require a
+map containing `1048576` entries (or 1Mb in size).
+
+Whilst this may not seem like a lot of memory, it causes problems for two reasons. Firstly, the processing step after each execution must now process much more
+data, and secondly a map this size is unlikely to fit within the L2 cache of the processor. Since this is a very hot code path, we are likely to pay a very heavy
+performance cost.
+
+Therefore, we must accept that not all edges can have a unique and that
+therefore there will be collisions. This means that if the fuzzer finds a new
+path by uncovering an edge which was not previously found, but that the same
+edge ID is used by another edge, then it may go completely unnoticed. This is
+obviously undesirable, but equally if our map is too large, then we will not be
+able to process as many potential inputs in the same time and hence not uncover
+edges for that reason. Thus a careful trade-off of map size must be made.
+
+# Block & Edge Numbering
+Since the original AFL, blocks and edges have always been numbered in the same
+way as we can see from the following C snippet from the whitepaper.
+
+```c
+    cur_location = (block_address >> 4) ^ (block_address << 8);
+    shared_mem[cur_location ^ prev_location]++;
+    prev_location = cur_location >> 1;
+
+```
+
+Each block ID is generated by performing a shift and XOR on its address. Then
+the edge ID is calculated as `E = B ^ (B' >> 1)`. Here, we can make two
+observations. In fact, the edge ID is also masked to ensure it is less than the
+size of the map being used.
+
+## Block IDs
+Firstly, the block ID doesn't have very good entropy. If we consider the address
+of the block, then whilst each block has a unique ID, it isn't necessarily very
+evenly distributed.
+
+We start with a large address, and need to discard a large number of the bits to
+generate a block ID which is within range. But how do we choose the unique bits
+of the address verus those which are the same for every block? The high bits of
+the address may simply be all `0s` or all `1s` to make the address cannonical,
+the middle portion of the address may be the same for all blocks (since if they
+are all within the same binary, then they will all be adjacent in memory), and
+on some systems, even the low bits may have poor entropy as some use fixed
+length aligned instructions. Then we need to consider that a portion of each
+binary may contain the `.data` or `.bss` sections and so may not contain any
+blocks of code at all.
+
+## Edge IDs
+Secondly, we can observe that when we generate an edge ID from the source and
+destination block IDs, we perform a right shift on the source block ID. Whilst
+there are good reasons as set out in the whitepaper why such a transform is
+applied, in so doing, we dispose of `1` bit of precious entropy in our source
+block ID.
+
+All together, this means that some edge IDs may be more popular than others.
+This means that some portions of the map may be very densly populated with large
+numbers of edges, whilst others may be very sparsely populated, or not populated
+at all.
+
+# Improvements
+One of the main reaons why this algorithm selected, is performance. All of the
+operations are very quick to perform and given we may be carrying this out for
+every block of code we execute, performance is critical.
+
+However, the design of the binary instrumentation modes of AFL++ has moved on.
+Both QEMU and FRIDA modes use a two stage process when executing a target
+application. Each block is first compiled or instrumented, and then it is
+executed. The compiled blocks can be re-used each time the target executes them.
+
+Since a blocks ID is based on its address, and this is known at compile time, we
+only need to generate this ID once per block and so this ID generation no longer
+needs to be as performant. We can therefore use a hash algorithm to generate
+this ID and therefore ensure that the block IDs are more evenly distributed.
+
+Edge IDs however, can only be determined at run-time. Since we don't know which
+blocks a given input will traverse until we run it. However, given our block IDs
+are now evenly distributed, generating an evenly distributed edge ID becomes
+simple. Here, the only change we make is to use a rotate operation rather than
+a shift operation so we don't lose a bit of entropy from the source ID.
+
+So our new algorithm becomes:
+```c
+    cur_location = hash(block_address)
+    shared_mem[cur_location ^ prev_location]++;
+    prev_location = rotate(cur_location, 1);
+```
+
+Lastly, in the original design, the `cur_location` was always set to `0`, at the
+beginning of a run, we instead set the value of `cur_location` to `hash(0)`.
+
+# Parallel Fuzzing
+Another sub-optimal aspect of the original design is that no matter how many
+instances of the fuzzer you ran in parallel, each instance numbered each block
+and so each edge with the same ID. Each instance would therefore find the same
+subset of edges collide with each other. In the event of a collision, all
+instances will hit the same road block.
+
+However, if we instead use a different seed for our hashing function for each
+instance, then each will ascribe each block a different ID and hence each edge
+will be given a different edge ID. This means that whilst one instance of the
+fuzzer may find a given pair of edges collide, it is very unlikely that another
+instance will find the same pair also collide.
+
+Due to the collaborative nature of parallel fuzzing, this means that whilst one
+instance may struggle to find a particular new path because the new edge
+collides, another instance will likely not encounter the same collision and thus
+be able to differentiate this new path and share it with the other instances.
+
+If only a single new edge is found, and the new path is shared with an instance
+for which that edge collides, that instance may disregard it as irrelevant. In
+practice, however, the discovery of a single new edge, likely leads to several
+more edges beneath it also being found and therefore the likelihood of all of
+these being collisions is very slim.
diff --git a/frida_mode/README.md b/frida_mode/README.md
index 024fc140..6cbb4c4c 100644
--- a/frida_mode/README.md
+++ b/frida_mode/README.md
@@ -293,6 +293,10 @@ FASAN then adds instrumentation for any instrucutions which use memory operands
 then calls into the `__asan_loadN` and `__asan_storeN` functions provided by the DSO
 to validate memory accesses against the shadow memory.
 
+# Collisions
+FRIDA mode has also introduced some improvements to reduce collisions in the map.
+See [here](MapDensity.md) for details.
+
 ## TODO
 
 The next features to be added are Aarch32 support as well as looking at
diff --git a/frida_mode/hook/qemu_hook.c b/frida_mode/hook/qemu_hook.c
index 5b4f65b1..56e787e3 100644
--- a/frida_mode/hook/qemu_hook.c
+++ b/frida_mode/hook/qemu_hook.c
@@ -36,6 +36,7 @@ struct x86_64_regs {
 void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base,
                          uint8_t *input_buf, uint32_t input_buf_len) {
 
+  (void)guest_base; /* unused */
   memcpy((void *)regs->rdi, input_buf, input_buf_len);
   regs->rsi = input_buf_len;
 
@@ -75,6 +76,7 @@ struct x86_regs {
 void afl_persistent_hook(struct x86_regs *regs, uint64_t guest_base,
                          uint8_t *input_buf, uint32_t input_buf_len) {
 
+  (void)guest_base; /* unused */
   void **esp = (void **)regs->esp;
   void * arg1 = esp[1];
   void **arg2 = &esp[2];
@@ -175,6 +177,7 @@ struct arm64_regs {
 void afl_persistent_hook(struct arm64_regs *regs, uint64_t guest_base,
                          uint8_t *input_buf, uint32_t input_buf_len) {
 
+  (void)guest_base; /* unused */
   memcpy((void *)regs->x0, input_buf, input_buf_len);
   regs->x1 = input_buf_len;
 }
diff --git a/frida_mode/include/instrument.h b/frida_mode/include/instrument.h
index 9c8d3a5d..695b46af 100644
--- a/frida_mode/include/instrument.h
+++ b/frida_mode/include/instrument.h
@@ -5,11 +5,12 @@
 
 #include "config.h"
 
-extern char *            instrument_debug_filename;
-extern gboolean          instrument_tracing;
-extern gboolean          instrument_optimize;
-extern gboolean          instrument_unique;
-extern __thread uint64_t instrument_previous_pc;
+extern char *           instrument_debug_filename;
+extern gboolean         instrument_tracing;
+extern gboolean         instrument_optimize;
+extern gboolean         instrument_unique;
+extern __thread guint64 instrument_previous_pc;
+extern guint64          instrument_hash_zero;
 
 extern uint8_t *__afl_area_ptr;
 extern uint32_t __afl_map_size;
@@ -33,5 +34,10 @@ void     instrument_debug_instruction(uint64_t address, uint16_t size);
 void     instrument_debug_end(GumStalkerOutput *output);
 void     instrument_flush(GumStalkerOutput *output);
 gpointer instrument_cur(GumStalkerOutput *output);
+
+void instrument_on_fork();
+
+guint64 instrument_get_offset_hash(GumAddress current_rip);
+
 #endif
 
diff --git a/frida_mode/many-linux/Dockerfile b/frida_mode/many-linux/Dockerfile
index 1d39c356..2cd56bc8 100644
--- a/frida_mode/many-linux/Dockerfile
+++ b/frida_mode/many-linux/Dockerfile
@@ -11,7 +11,7 @@ RUN git clone https://github.com/AFLplusplus/AFLplusplus.git
 
 WORKDIR /AFLplusplus
 RUN mkdir -p /AFLplusplus/frida_mode/build/frida/
-RUN curl -L -o /AFLplusplus/frida_mode/build/frida/frida-gumjs-devkit-14.2.18-linux-x86_64.tar.xz "https://github.com/frida/frida/releases/download/14.2.18/frida-gumjs-devkit-14.2.18-linux-x86_64.tar.xz"
+RUN curl -L -o /AFLplusplus/frida_mode/build/frida/frida-gumjs-devkit-15.0.0-linux-x86_64.tar.xz "https://github.com/frida/frida/releases/download/15.0.0/frida-gumjs-devkit-15.0.0-linux-x86_64.tar.xz"
 
 WORKDIR /AFLplusplus
 RUN git checkout dev
diff --git a/frida_mode/src/entry.c b/frida_mode/src/entry.c
index f70e21fc..a0ffd028 100644
--- a/frida_mode/src/entry.c
+++ b/frida_mode/src/entry.c
@@ -21,7 +21,7 @@ static void entry_launch(void) {
   __afl_manual_init();
 
   /* Child here */
-  instrument_previous_pc = 0;
+  instrument_on_fork();
   stats_on_fork();
 
 }
diff --git a/frida_mode/src/instrument/instrument.c b/frida_mode/src/instrument/instrument.c
index 2d857716..81d14013 100644
--- a/frida_mode/src/instrument/instrument.c
+++ b/frida_mode/src/instrument/instrument.c
@@ -6,6 +6,7 @@
 
 #include "config.h"
 #include "debug.h"
+#include "hash.h"
 
 #include "asan.h"
 #include "entry.h"
@@ -22,10 +23,12 @@
 gboolean instrument_tracing = false;
 gboolean instrument_optimize = false;
 gboolean instrument_unique = false;
+guint64  instrument_hash_zero = 0;
+guint64  instrument_hash_seed = 0;
 
 static GumStalkerTransformer *transformer = NULL;
 
-__thread uint64_t instrument_previous_pc = 0;
+__thread guint64 instrument_previous_pc = 0;
 
 static GumAddress previous_rip = 0;
 static u8 *       edges_notified = NULL;
@@ -49,21 +52,18 @@ static void trace_debug(char *format, ...) {
 
 }
 
-__attribute__((hot)) static void on_basic_block(GumCpuContext *context,
-                                                gpointer       user_data) {
+guint64 instrument_get_offset_hash(GumAddress current_rip) {
 
-  UNUSED_PARAMETER(context);
+  guint64 area_offset = hash64((unsigned char *)&current_rip,
+                               sizeof(GumAddress), instrument_hash_seed);
+  return area_offset &= MAP_SIZE - 1;
 
-  GumAddress current_rip = GUM_ADDRESS(user_data);
-  GumAddress current_pc;
-  GumAddress edge;
-  uint8_t *  cursor;
-  uint64_t   value;
+}
 
-  current_pc = (current_rip >> 4) ^ (current_rip << 8);
-  current_pc &= MAP_SIZE - 1;
+__attribute__((hot)) static void instrument_increment_map(GumAddress edge) {
 
-  edge = current_pc ^ instrument_previous_pc;
+  uint8_t *cursor;
+  uint64_t value;
 
   cursor = &__afl_area_ptr[edge];
   value = *cursor;
@@ -79,7 +79,21 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context,
   }
 
   *cursor = value;
-  instrument_previous_pc = current_pc >> 1;
+
+}
+
+__attribute__((hot)) static void on_basic_block(GumCpuContext *context,
+                                                gpointer       user_data) {
+
+  UNUSED_PARAMETER(context);
+
+  GumAddress current_rip = GUM_ADDRESS(user_data);
+  guint64    current_pc = instrument_get_offset_hash(current_rip);
+  guint64    edge;
+
+  edge = current_pc ^ instrument_previous_pc;
+
+  instrument_increment_map(edge);
 
   if (unlikely(instrument_tracing)) {
 
@@ -98,6 +112,9 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context,
 
   }
 
+  instrument_previous_pc =
+      ((current_pc & (MAP_SIZE - 1) >> 1)) | ((current_pc & 0x1) << 15);
+
 }
 
 static void instrument_basic_block(GumStalkerIterator *iterator,
@@ -265,6 +282,19 @@ void instrument_init(void) {
 
   }
 
+  /*
+   * By using a different seed value for the hash, we can make different
+   * instances have edge collisions in different places when carrying out
+   * parallel fuzzing. The seed itself, doesn't have to be random, it just
+   * needs to be different for each instance.
+   */
+  instrument_hash_seed =
+      g_get_monotonic_time() ^ (((guint64)getpid()) << 32) ^ gettid();
+
+  OKF("Instrumentation - seed [0x%016" G_GINT64_MODIFIER "x]",
+      instrument_hash_seed);
+  instrument_hash_zero = instrument_get_offset_hash(0);
+
   instrument_debug_init();
   asan_init();
   cmplog_init();
@@ -278,3 +308,9 @@ GumStalkerTransformer *instrument_get_transformer(void) {
 
 }
 
+void instrument_on_fork() {
+
+  instrument_previous_pc = instrument_hash_zero;
+
+}
+
diff --git a/frida_mode/src/instrument/instrument_arm64.c b/frida_mode/src/instrument/instrument_arm64.c
index 17f97c97..cf37e048 100644
--- a/frida_mode/src/instrument/instrument_arm64.c
+++ b/frida_mode/src/instrument/instrument_arm64.c
@@ -12,15 +12,15 @@ static GumAddress current_log_impl = GUM_ADDRESS(0);
 static const guint8 afl_log_code[] = {
 
     // __afl_area_ptr[current_pc ^ previous_pc]++;
-    // previous_pc = current_pc >> 1;
+    // previous_pc = current_pc ROR 1;
     0xE1, 0x0B, 0xBF, 0xA9,  // stp x1, x2, [sp, -0x10]!
     0xE3, 0x13, 0xBF, 0xA9,  // stp x3, x4, [sp, -0x10]!
 
     // x0 = current_pc
-    0xe1, 0x01, 0x00, 0x58,  // ldr x1, #0x3c, =&__afl_area_ptr
+    0x21, 0x02, 0x00, 0x58,  // ldr x1, #0x44, =&__afl_area_ptr
     0x21, 0x00, 0x40, 0xf9,  // ldr x1, [x1] (=__afl_area_ptr)
 
-    0xe2, 0x01, 0x00, 0x58,  // ldr x2, #0x3c, =&previous_pc
+    0x22, 0x02, 0x00, 0x58,  // ldr x2, #0x44, =&previous_pc
     0x42, 0x00, 0x40, 0xf9,  // ldr x2, [x2] (=previous_pc)
 
     // __afl_area_ptr[current_pc ^ previous_pc]++;
@@ -30,8 +30,11 @@ static const guint8 afl_log_code[] = {
     0x63, 0x00, 0x1f, 0x9a,  // adc x3, x3, xzr
     0x23, 0x68, 0x22, 0xf8,  // str x3, [x1, x2]
 
-    // previous_pc = current_pc >> 1;
-    0xe0, 0x07, 0x40, 0x8b,  // add x0, xzr, x0, LSR #1
+    // previous_pc = current_pc ROR 1;
+    0xe4, 0x07, 0x40, 0x8b,  // add x4, xzr, x0, LSR #1
+    0xe0, 0xff, 0x00, 0x8b,  // add x0, xzr, x0, LSL #63
+    0x80, 0xc0, 0x40, 0x8b,  // add x0, x4, x0, LSR #48
+
     0xe2, 0x00, 0x00, 0x58,  // ldr x2, #0x1c, =&previous_pc
     0x40, 0x00, 0x00, 0xf9,  // str x0, [x2]
 
@@ -54,8 +57,7 @@ void instrument_coverage_optimize(const cs_insn *   instr,
                                   GumStalkerOutput *output) {
 
   guint64 current_pc = instr->address;
-  guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8);
-  area_offset &= MAP_SIZE - 1;
+  guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address));
   GumArm64Writer *cw = output->writer.arm64;
 
   if (current_log_impl == 0 ||
diff --git a/frida_mode/src/instrument/instrument_x64.c b/frida_mode/src/instrument/instrument_x64.c
index a2b54369..fec8afbb 100644
--- a/frida_mode/src/instrument/instrument_x64.c
+++ b/frida_mode/src/instrument/instrument_x64.c
@@ -24,7 +24,7 @@ static const guint8 afl_log_code[] = {
 
     0x80, 0x02, 0x01,                              /* add byte ptr [rdx], 1 */
     0x80, 0x12, 0x00,                              /* adc byte ptr [rdx], 0 */
-    0x48, 0xd1, 0xef,                                         /* shr rdi, 1 */
+    0x66, 0xd1, 0xcf,                                          /* ror di, 1 */
     0x48, 0x89, 0x39,                               /* mov qword [rcx], rdi */
 
     0x5a,                                                        /* pop rdx */
@@ -49,13 +49,9 @@ gboolean instrument_is_coverage_optimize_supported(void) {
 
 static guint8 align_pad[] = {0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90};
 
-void instrument_coverage_optimize(const cs_insn *   instr,
-                                  GumStalkerOutput *output) {
+static void instrument_coverate_write_function(GumStalkerOutput *output) {
 
-  guint64 current_pc = instr->address;
-  guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8);
-  guint64 misalign = 0;
-  area_offset &= MAP_SIZE - 1;
+  guint64       misalign = 0;
   GumX86Writer *cw = output->writer.x86;
 
   if (current_log_impl == 0 ||
@@ -87,6 +83,15 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
   }
 
+}
+
+void instrument_coverage_optimize(const cs_insn *   instr,
+                                  GumStalkerOutput *output) {
+
+  GumX86Writer *cw = output->writer.x86;
+  guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address));
+  instrument_coverate_write_function(output);
+
   gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP,
                                         -GUM_RED_ZONE_SIZE);
   gum_x86_writer_put_push_reg(cw, GUM_REG_RDI);
diff --git a/frida_mode/src/instrument/instrument_x86.c b/frida_mode/src/instrument/instrument_x86.c
index 3c3dc272..7bf48f96 100644
--- a/frida_mode/src/instrument/instrument_x86.c
+++ b/frida_mode/src/instrument/instrument_x86.c
@@ -30,7 +30,8 @@ static void instrument_coverage_function(GumX86Writer *cw) {
   uint8_t adc_byte_ptr_edx_0[] = {0x80, 0x12, 0x00};
   gum_x86_writer_put_bytes(cw, adc_byte_ptr_edx_0, sizeof(adc_byte_ptr_edx_0));
 
-  gum_x86_writer_put_shr_reg_u8(cw, GUM_REG_EDI, 1);
+  uint8_t ror_di_1[] = {0x66, 0xd1, 0xcf};
+  gum_x86_writer_put_bytes(cw, ror_di_1, sizeof(ror_di_1));
   gum_x86_writer_put_mov_reg_ptr_reg(cw, GUM_REG_ECX, GUM_REG_EDI);
 
   gum_x86_writer_put_pop_reg(cw, GUM_REG_EDX);
@@ -46,15 +47,8 @@ gboolean instrument_is_coverage_optimize_supported(void) {
 
 }
 
-void instrument_coverage_optimize(const cs_insn *   instr,
-                                  GumStalkerOutput *output) {
-
-  UNUSED_PARAMETER(instr);
-  UNUSED_PARAMETER(output);
+static void instrument_coverate_write_function(GumStalkerOutput *output) {
 
-  guint64 current_pc = instr->address;
-  guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8);
-  area_offset &= MAP_SIZE - 1;
   GumX86Writer *cw = output->writer.x86;
 
   if (current_log_impl == 0 ||
@@ -73,7 +67,15 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
   }
 
-  // gum_x86_writer_put_breakpoint(cw);
+}
+
+void instrument_coverage_optimize(const cs_insn *   instr,
+                                  GumStalkerOutput *output) {
+
+  GumX86Writer *cw = output->writer.x86;
+  guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address));
+  instrument_coverate_write_function(output);
+
   gum_x86_writer_put_push_reg(cw, GUM_REG_EDI);
   gum_x86_writer_put_mov_reg_address(cw, GUM_REG_EDI, area_offset);
   gum_x86_writer_put_call_address(cw, current_log_impl);
diff --git a/frida_mode/src/persistent/persistent_arm64.c b/frida_mode/src/persistent/persistent_arm64.c
index 4ab7b283..3cd61cd5 100644
--- a/frida_mode/src/persistent/persistent_arm64.c
+++ b/frida_mode/src/persistent/persistent_arm64.c
@@ -237,7 +237,7 @@ static void instrument_exit(GumArm64Writer *cw) {
 static int instrument_afl_persistent_loop_func(void) {
 
   int ret = __afl_persistent_loop(persistent_count);
-  instrument_previous_pc = 0;
+  instrument_previous_pc = instrument_hash_zero;
   return ret;
 
 }
diff --git a/frida_mode/src/persistent/persistent_x64.c b/frida_mode/src/persistent/persistent_x64.c
index ce3017e4..c0bd9a09 100644
--- a/frida_mode/src/persistent/persistent_x64.c
+++ b/frida_mode/src/persistent/persistent_x64.c
@@ -174,7 +174,7 @@ static void instrument_exit(GumX86Writer *cw) {
 static int instrument_afl_persistent_loop_func(void) {
 
   int ret = __afl_persistent_loop(persistent_count);
-  instrument_previous_pc = 0;
+  instrument_previous_pc = instrument_hash_zero;
   return ret;
 
 }
diff --git a/frida_mode/src/persistent/persistent_x86.c b/frida_mode/src/persistent/persistent_x86.c
index cc1f1a4f..b911676a 100644
--- a/frida_mode/src/persistent/persistent_x86.c
+++ b/frida_mode/src/persistent/persistent_x86.c
@@ -130,7 +130,7 @@ static void instrument_exit(GumX86Writer *cw) {
 static int instrument_afl_persistent_loop_func(void) {
 
   int ret = __afl_persistent_loop(persistent_count);
-  instrument_previous_pc = 0;
+  instrument_previous_pc = instrument_hash_zero;
   return ret;
 
 }
diff --git a/frida_mode/util/get_symbol_addr.sh b/frida_mode/util/get_symbol_addr.sh
index 7f9b7d22..f5d8df91 100755
--- a/frida_mode/util/get_symbol_addr.sh
+++ b/frida_mode/util/get_symbol_addr.sh
@@ -26,7 +26,7 @@ file "$target" | grep -q executable && {
   exit 0
 }
 
-hex_base=$(echo "$3" | awk '{sub("^0x","");print $0}')
+hex_base=$(echo "$3" | awk '{sub("^0x","");print $0}' | tr a-f A-F )
 nm "$target" | grep -i "T $symbol" | awk '{print$1}' | tr a-f A-F | \
   xargs echo "ibase=16;obase=10;$hex_base + " | bc | tr A-F a-f | awk '{print "0x"$0}'
 exit 0