about summary refs log tree commit diff
path: root/frida_mode/src
diff options
context:
space:
mode:
Diffstat (limited to 'frida_mode/src')
-rw-r--r--frida_mode/src/asan/asan_arm32.c (renamed from frida_mode/src/asan/asan_arm.c)0
-rw-r--r--frida_mode/src/asan/asan_arm64.c76
-rw-r--r--frida_mode/src/cmplog/cmplog_arm32.c (renamed from frida_mode/src/cmplog/cmplog_arm.c)0
-rw-r--r--frida_mode/src/cmplog/cmplog_arm64.c295
-rw-r--r--frida_mode/src/ctx/ctx_arm32.c16
-rw-r--r--frida_mode/src/ctx/ctx_arm64.c303
-rw-r--r--frida_mode/src/instrument/instrument.c5
-rw-r--r--frida_mode/src/instrument/instrument_arm32.c12
-rw-r--r--frida_mode/src/instrument/instrument_arm64.c12
-rw-r--r--frida_mode/src/instrument/instrument_debug.c58
-rw-r--r--frida_mode/src/instrument/instrument_x64.c12
-rw-r--r--frida_mode/src/instrument/instrument_x86.c12
-rw-r--r--frida_mode/src/persistent/persistent_arm64.c354
-rw-r--r--frida_mode/src/persistent/persistent_x64.c2
-rw-r--r--frida_mode/src/persistent/persistent_x86.c2
-rw-r--r--frida_mode/src/stats/stats.c1
-rw-r--r--frida_mode/src/stats/stats_arm32.c (renamed from frida_mode/src/stats/stats_arm.c)0
17 files changed, 1114 insertions, 46 deletions
diff --git a/frida_mode/src/asan/asan_arm.c b/frida_mode/src/asan/asan_arm32.c
index 79475ced..79475ced 100644
--- a/frida_mode/src/asan/asan_arm.c
+++ b/frida_mode/src/asan/asan_arm32.c
diff --git a/frida_mode/src/asan/asan_arm64.c b/frida_mode/src/asan/asan_arm64.c
index 6262ee18..66138e42 100644
--- a/frida_mode/src/asan/asan_arm64.c
+++ b/frida_mode/src/asan/asan_arm64.c
@@ -1,18 +1,80 @@
+#include <dlfcn.h>
 #include "frida-gum.h"
 
 #include "debug.h"
 
 #include "asan.h"
+#include "ctx.h"
 #include "util.h"
 
 #if defined(__aarch64__)
+
+typedef struct {
+
+  size_t      size;
+  cs_arm64_op operand;
+
+} asan_ctx_t;
+
+typedef void (*asan_loadN_t)(gsize address, uint8_t size);
+typedef void (*asan_storeN_t)(gsize address, uint8_t size);
+
+asan_loadN_t  asan_loadN = NULL;
+asan_storeN_t asan_storeN = NULL;
+
+static void asan_callout(GumCpuContext *ctx, gpointer user_data) {
+
+  asan_ctx_t *  asan_ctx = (asan_ctx_t *)user_data;
+  cs_arm64_op * operand = &asan_ctx->operand;
+  arm64_op_mem *mem = &operand->mem;
+  gsize         base = 0;
+  gsize         index = 0;
+  gsize         address;
+
+  if (mem->base != ARM64_REG_INVALID) { base = ctx_read_reg(ctx, mem->base); }
+
+  if (mem->index != ARM64_REG_INVALID) {
+
+    index = ctx_read_reg(ctx, mem->index);
+
+  }
+
+  address = base + index + mem->disp;
+
+  if ((operand->access & CS_AC_READ) == CS_AC_READ) {
+
+    asan_loadN(address, asan_ctx->size);
+
+  }
+
+  if ((operand->access & CS_AC_WRITE) == CS_AC_WRITE) {
+
+    asan_storeN(address, asan_ctx->size);
+
+  }
+
+}
+
 void asan_instrument(const cs_insn *instr, GumStalkerIterator *iterator) {
 
-  UNUSED_PARAMETER(instr);
   UNUSED_PARAMETER(iterator);
-  if (asan_initialized) {
 
-    FATAL("ASAN mode not supported on this architecture");
+  cs_arm64     arm64 = instr->detail->arm64;
+  cs_arm64_op *operand;
+  asan_ctx_t * ctx;
+
+  if (!asan_initialized) return;
+
+  for (uint8_t i = 0; i < arm64.op_count; i++) {
+
+    operand = &arm64.operands[i];
+
+    if (operand->type != ARM64_OP_MEM) { continue; }
+
+    ctx = g_malloc0(sizeof(asan_ctx_t));
+    ctx->size = ctx_get_size(instr, &arm64.operands[0]);
+    memcpy(&ctx->operand, operand, sizeof(cs_arm64_op));
+    gum_stalker_iterator_put_callout(iterator, asan_callout, ctx, g_free);
 
   }
 
@@ -20,7 +82,13 @@ void asan_instrument(const cs_insn *instr, GumStalkerIterator *iterator) {
 
 void asan_arch_init(void) {
 
-  FATAL("ASAN mode not supported on this architecture");
+  asan_loadN = (asan_loadN_t)dlsym(RTLD_DEFAULT, "__asan_loadN");
+  asan_storeN = (asan_loadN_t)dlsym(RTLD_DEFAULT, "__asan_storeN");
+  if (asan_loadN == NULL || asan_storeN == NULL) {
+
+    FATAL("Frida ASAN failed to find '__asan_loadN' or '__asan_storeN'");
+
+  }
 
 }
 
diff --git a/frida_mode/src/cmplog/cmplog_arm.c b/frida_mode/src/cmplog/cmplog_arm32.c
index 5af28f3f..5af28f3f 100644
--- a/frida_mode/src/cmplog/cmplog_arm.c
+++ b/frida_mode/src/cmplog/cmplog_arm32.c
diff --git a/frida_mode/src/cmplog/cmplog_arm64.c b/frida_mode/src/cmplog/cmplog_arm64.c
index 187d0162..04631ff8 100644
--- a/frida_mode/src/cmplog/cmplog_arm64.c
+++ b/frida_mode/src/cmplog/cmplog_arm64.c
@@ -1,17 +1,304 @@
 #include "frida-gum.h"
 
 #include "debug.h"
+#include "cmplog.h"
 
+#include "ctx.h"
 #include "frida_cmplog.h"
 #include "util.h"
 
 #if defined(__aarch64__)
+
+typedef struct {
+
+  arm64_op_type type;
+  uint8_t       size;
+
+  union {
+
+    arm64_op_mem mem;
+    arm64_reg    reg;
+    int64_t      imm;
+
+  };
+
+} cmplog_ctx_t;
+
+typedef struct {
+
+  cmplog_ctx_t operand1;
+  cmplog_ctx_t operand2;
+  size_t       size;
+
+} cmplog_pair_ctx_t;
+
+static gboolean cmplog_read_mem(GumCpuContext *ctx, uint8_t size,
+                                arm64_op_mem *mem, gsize *val) {
+
+  gsize base = 0;
+  gsize index = 0;
+  gsize address;
+
+  if (mem->base != ARM64_REG_INVALID) { base = ctx_read_reg(ctx, mem->base); }
+
+  if (mem->index != ARM64_REG_INVALID) {
+
+    index = ctx_read_reg(ctx, mem->index);
+
+  }
+
+  address = base + index + mem->disp;
+
+  if (!cmplog_is_readable(address, size)) { return FALSE; }
+
+  switch (size) {
+
+    case 1:
+      *val = *((guint8 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    case 2:
+      *val = *((guint16 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    case 4:
+      *val = *((guint32 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    case 8:
+      *val = *((guint64 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    default:
+      FATAL("Invalid operand size: %d\n", size);
+
+  }
+
+  return FALSE;
+
+}
+
+static gboolean cmplog_get_operand_value(GumCpuContext *context,
+                                         cmplog_ctx_t *ctx, gsize *val) {
+
+  switch (ctx->type) {
+
+    case ARM64_OP_REG:
+      *val = ctx_read_reg(context, ctx->reg);
+      return TRUE;
+    case ARM64_OP_IMM:
+      *val = ctx->imm;
+      return TRUE;
+    case ARM64_OP_MEM:
+      return cmplog_read_mem(context, ctx->size, &ctx->mem, val);
+    default:
+      FATAL("Invalid operand type: %d\n", ctx->type);
+
+  }
+
+  return FALSE;
+
+}
+
+static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
+
+  UNUSED_PARAMETER(user_data);
+
+  gsize address = context->pc;
+  gsize x0 = ctx_read_reg(context, ARM64_REG_X0);
+  gsize x1 = ctx_read_reg(context, ARM64_REG_X1);
+
+  if (((G_MAXULONG - x0) < 32) || ((G_MAXULONG - x1) < 32)) return;
+
+  if (!cmplog_is_readable(x0, 32) || !cmplog_is_readable(x1, 32)) return;
+
+  void *ptr1 = GSIZE_TO_POINTER(x0);
+  void *ptr2 = GSIZE_TO_POINTER(x1);
+
+  uintptr_t k = address;
+
+  k = (k >> 4) ^ (k << 8);
+  k &= CMP_MAP_W - 1;
+
+  __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+
+  u32 hits = __afl_cmp_map->headers[k].hits;
+  __afl_cmp_map->headers[k].hits = hits + 1;
+
+  __afl_cmp_map->headers[k].shape = 31;
+
+  hits &= CMP_MAP_RTN_H - 1;
+  gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, ptr1,
+             32);
+  gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, ptr2,
+             32);
+
+}
+
+static void cmplog_instrument_put_operand(cmplog_ctx_t *ctx,
+                                          cs_arm64_op * operand) {
+
+  ctx->type = operand->type;
+  switch (operand->type) {
+
+    case ARM64_OP_REG:
+      gum_memcpy(&ctx->reg, &operand->reg, sizeof(arm64_reg));
+      break;
+    case ARM64_OP_IMM:
+      gum_memcpy(&ctx->imm, &operand->imm, sizeof(int64_t));
+      break;
+    case ARM64_OP_MEM:
+      gum_memcpy(&ctx->mem, &operand->mem, sizeof(arm64_op_mem));
+      break;
+    default:
+      FATAL("Invalid operand type: %d\n", operand->type);
+
+  }
+
+}
+
+static void cmplog_instrument_call(const cs_insn *     instr,
+                                   GumStalkerIterator *iterator) {
+
+  cs_arm64     arm64 = instr->detail->arm64;
+  cs_arm64_op *operand;
+
+  switch (instr->id) {
+
+    case ARM64_INS_BL:
+    case ARM64_INS_BLR:
+    case ARM64_INS_BLRAA:
+    case ARM64_INS_BLRAAZ:
+    case ARM64_INS_BLRAB:
+    case ARM64_INS_BLRABZ:
+      break;
+    default:
+      return;
+
+  }
+
+  if (arm64.op_count != 1) return;
+
+  operand = &arm64.operands[0];
+
+  if (operand->type == ARM64_OP_INVALID) return;
+
+  gum_stalker_iterator_put_callout(iterator, cmplog_call_callout, NULL, NULL);
+
+}
+
+static void cmplog_handle_cmp_sub(GumCpuContext *context, gsize operand1,
+                                  gsize operand2, uint8_t size) {
+
+  gsize address = context->pc;
+
+  register uintptr_t k = (uintptr_t)address;
+
+  k = (k >> 4) ^ (k << 8);
+  k &= CMP_MAP_W - 1;
+
+  __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+  u32 hits = __afl_cmp_map->headers[k].hits;
+  __afl_cmp_map->headers[k].hits = hits + 1;
+
+  __afl_cmp_map->headers[k].shape = (size - 1);
+
+  hits &= CMP_MAP_H - 1;
+  __afl_cmp_map->log[k][hits].v0 = operand1;
+  __afl_cmp_map->log[k][hits].v1 = operand2;
+
+}
+
+static void cmplog_cmp_sub_callout(GumCpuContext *context, gpointer user_data) {
+
+  cmplog_pair_ctx_t *ctx = (cmplog_pair_ctx_t *)user_data;
+  gsize              operand1;
+  gsize              operand2;
+
+  if (!cmplog_get_operand_value(context, &ctx->operand1, &operand1)) { return; }
+  if (!cmplog_get_operand_value(context, &ctx->operand2, &operand2)) { return; }
+
+  cmplog_handle_cmp_sub(context, operand1, operand2, ctx->size);
+
+}
+
+static void cmplog_instrument_cmp_sub_put_callout(GumStalkerIterator *iterator,
+                                                  cs_arm64_op *       operand1,
+                                                  cs_arm64_op *       operand2,
+                                                  size_t              size) {
+
+  cmplog_pair_ctx_t *ctx = g_malloc(sizeof(cmplog_pair_ctx_t));
+  if (ctx == NULL) return;
+
+  cmplog_instrument_put_operand(&ctx->operand1, operand1);
+  cmplog_instrument_put_operand(&ctx->operand2, operand2);
+  ctx->size = size;
+
+  gum_stalker_iterator_put_callout(iterator, cmplog_cmp_sub_callout, ctx,
+                                   g_free);
+
+}
+
+static void cmplog_instrument_cmp_sub(const cs_insn *     instr,
+                                      GumStalkerIterator *iterator) {
+
+  cs_arm64     arm64 = instr->detail->arm64;
+  cs_arm64_op *operand1;
+  cs_arm64_op *operand2;
+  size_t       size;
+
+  switch (instr->id) {
+
+    case ARM64_INS_ADCS:
+    case ARM64_INS_ADDS:
+    case ARM64_INS_ANDS:
+    case ARM64_INS_BICS:
+    case ARM64_INS_CMN:
+    case ARM64_INS_CMP:
+    case ARM64_INS_CMPEQ:
+    case ARM64_INS_CMPGE:
+    case ARM64_INS_CMPGT:
+    case ARM64_INS_CMPHI:
+    case ARM64_INS_CMPHS:
+    case ARM64_INS_CMPLE:
+    case ARM64_INS_CMPLO:
+    case ARM64_INS_CMPLS:
+    case ARM64_INS_CMPLT:
+    case ARM64_INS_CMPNE:
+    case ARM64_INS_EORS:
+    case ARM64_INS_NANDS:
+    case ARM64_INS_NEGS:
+    case ARM64_INS_NGCS:
+    case ARM64_INS_NORS:
+    case ARM64_INS_NOTS:
+    case ARM64_INS_ORNS:
+    case ARM64_INS_ORRS:
+    case ARM64_INS_SBCS:
+    case ARM64_INS_SUBS:
+      break;
+
+    default:
+      return;
+
+  }
+
+  if (arm64.op_count != 2) return;
+
+  operand1 = &arm64.operands[0];
+  operand2 = &arm64.operands[1];
+
+  if (operand1->type == ARM64_OP_INVALID) return;
+  if (operand2->type == ARM64_OP_INVALID) return;
+
+  size = ctx_get_size(instr, &arm64.operands[0]);
+
+  cmplog_instrument_cmp_sub_put_callout(iterator, operand1, operand2, size);
+
+}
+
 void cmplog_instrument(const cs_insn *instr, GumStalkerIterator *iterator) {
 
-  UNUSED_PARAMETER(instr);
-  UNUSED_PARAMETER(iterator);
-  if (__afl_cmp_map == NULL) { return; }
-  FATAL("CMPLOG mode not supported on this architecture");
+  if (__afl_cmp_map == NULL) return;
+
+  cmplog_instrument_call(instr, iterator);
+  cmplog_instrument_cmp_sub(instr, iterator);
 
 }
 
diff --git a/frida_mode/src/ctx/ctx_arm32.c b/frida_mode/src/ctx/ctx_arm32.c
new file mode 100644
index 00000000..a5c6f6d4
--- /dev/null
+++ b/frida_mode/src/ctx/ctx_arm32.c
@@ -0,0 +1,16 @@
+#include "frida-gum.h"
+
+#include "debug.h"
+
+#include "ctx.h"
+
+#if defined(__arm__)
+
+gsize ctx_read_reg(GumIA32CpuContext *ctx, x86_reg reg) {
+
+  FATAL("ctx_read_reg unimplemented for this architecture");
+
+}
+
+#endif
+
diff --git a/frida_mode/src/ctx/ctx_arm64.c b/frida_mode/src/ctx/ctx_arm64.c
new file mode 100644
index 00000000..d09896af
--- /dev/null
+++ b/frida_mode/src/ctx/ctx_arm64.c
@@ -0,0 +1,303 @@
+#include "frida-gum.h"
+
+#include "debug.h"
+
+#include "ctx.h"
+
+#if defined(__aarch64__)
+
+  #define ARM64_REG_8(LABEL, REG) \
+    case LABEL: {                 \
+                                  \
+      return REG & GUM_INT8_MASK; \
+                                  \
+    }
+
+  #define ARM64_REG_16(LABEL, REG)   \
+    case LABEL: {                    \
+                                     \
+      return (REG & GUM_INT16_MASK); \
+                                     \
+    }
+
+  #define ARM64_REG_32(LABEL, REG)   \
+    case LABEL: {                    \
+                                     \
+      return (REG & GUM_INT32_MASK); \
+                                     \
+    }
+
+  #define ARM64_REG_64(LABEL, REG) \
+    case LABEL: {                  \
+                                   \
+      return (REG);                \
+                                   \
+    }
+
+gsize ctx_read_reg(GumArm64CpuContext *ctx, arm64_reg reg) {
+
+  switch (reg) {
+
+    case ARM64_REG_WZR:
+    case ARM64_REG_XZR:
+      return 0;
+
+      ARM64_REG_8(ARM64_REG_B0, ctx->x[0])
+      ARM64_REG_8(ARM64_REG_B1, ctx->x[1])
+      ARM64_REG_8(ARM64_REG_B2, ctx->x[2])
+      ARM64_REG_8(ARM64_REG_B3, ctx->x[3])
+      ARM64_REG_8(ARM64_REG_B4, ctx->x[4])
+      ARM64_REG_8(ARM64_REG_B5, ctx->x[5])
+      ARM64_REG_8(ARM64_REG_B6, ctx->x[6])
+      ARM64_REG_8(ARM64_REG_B7, ctx->x[7])
+      ARM64_REG_8(ARM64_REG_B8, ctx->x[8])
+      ARM64_REG_8(ARM64_REG_B9, ctx->x[9])
+      ARM64_REG_8(ARM64_REG_B10, ctx->x[10])
+      ARM64_REG_8(ARM64_REG_B11, ctx->x[11])
+      ARM64_REG_8(ARM64_REG_B12, ctx->x[12])
+      ARM64_REG_8(ARM64_REG_B13, ctx->x[13])
+      ARM64_REG_8(ARM64_REG_B14, ctx->x[14])
+      ARM64_REG_8(ARM64_REG_B15, ctx->x[15])
+      ARM64_REG_8(ARM64_REG_B16, ctx->x[16])
+      ARM64_REG_8(ARM64_REG_B17, ctx->x[17])
+      ARM64_REG_8(ARM64_REG_B18, ctx->x[18])
+      ARM64_REG_8(ARM64_REG_B19, ctx->x[19])
+      ARM64_REG_8(ARM64_REG_B20, ctx->x[20])
+      ARM64_REG_8(ARM64_REG_B21, ctx->x[21])
+      ARM64_REG_8(ARM64_REG_B22, ctx->x[22])
+      ARM64_REG_8(ARM64_REG_B23, ctx->x[23])
+      ARM64_REG_8(ARM64_REG_B24, ctx->x[24])
+      ARM64_REG_8(ARM64_REG_B25, ctx->x[25])
+      ARM64_REG_8(ARM64_REG_B26, ctx->x[26])
+      ARM64_REG_8(ARM64_REG_B27, ctx->x[27])
+      ARM64_REG_8(ARM64_REG_B28, ctx->x[28])
+      ARM64_REG_8(ARM64_REG_B29, ctx->fp)
+      ARM64_REG_8(ARM64_REG_B30, ctx->lr)
+      ARM64_REG_8(ARM64_REG_B31, ctx->sp)
+
+      ARM64_REG_16(ARM64_REG_H0, ctx->x[0])
+      ARM64_REG_16(ARM64_REG_H1, ctx->x[1])
+      ARM64_REG_16(ARM64_REG_H2, ctx->x[2])
+      ARM64_REG_16(ARM64_REG_H3, ctx->x[3])
+      ARM64_REG_16(ARM64_REG_H4, ctx->x[4])
+      ARM64_REG_16(ARM64_REG_H5, ctx->x[5])
+      ARM64_REG_16(ARM64_REG_H6, ctx->x[6])
+      ARM64_REG_16(ARM64_REG_H7, ctx->x[7])
+      ARM64_REG_16(ARM64_REG_H8, ctx->x[8])
+      ARM64_REG_16(ARM64_REG_H9, ctx->x[9])
+      ARM64_REG_16(ARM64_REG_H10, ctx->x[10])
+      ARM64_REG_16(ARM64_REG_H11, ctx->x[11])
+      ARM64_REG_16(ARM64_REG_H12, ctx->x[12])
+      ARM64_REG_16(ARM64_REG_H13, ctx->x[13])
+      ARM64_REG_16(ARM64_REG_H14, ctx->x[14])
+      ARM64_REG_16(ARM64_REG_H15, ctx->x[15])
+      ARM64_REG_16(ARM64_REG_H16, ctx->x[16])
+      ARM64_REG_16(ARM64_REG_H17, ctx->x[17])
+      ARM64_REG_16(ARM64_REG_H18, ctx->x[18])
+      ARM64_REG_16(ARM64_REG_H19, ctx->x[19])
+      ARM64_REG_16(ARM64_REG_H20, ctx->x[20])
+      ARM64_REG_16(ARM64_REG_H21, ctx->x[21])
+      ARM64_REG_16(ARM64_REG_H22, ctx->x[22])
+      ARM64_REG_16(ARM64_REG_H23, ctx->x[23])
+      ARM64_REG_16(ARM64_REG_H24, ctx->x[24])
+      ARM64_REG_16(ARM64_REG_H25, ctx->x[25])
+      ARM64_REG_16(ARM64_REG_H26, ctx->x[26])
+      ARM64_REG_16(ARM64_REG_H27, ctx->x[27])
+      ARM64_REG_16(ARM64_REG_H28, ctx->x[28])
+      ARM64_REG_16(ARM64_REG_H29, ctx->fp)
+      ARM64_REG_16(ARM64_REG_H30, ctx->lr)
+      ARM64_REG_16(ARM64_REG_H31, ctx->sp)
+
+      ARM64_REG_32(ARM64_REG_W0, ctx->x[0])
+      ARM64_REG_32(ARM64_REG_W1, ctx->x[1])
+      ARM64_REG_32(ARM64_REG_W2, ctx->x[2])
+      ARM64_REG_32(ARM64_REG_W3, ctx->x[3])
+      ARM64_REG_32(ARM64_REG_W4, ctx->x[4])
+      ARM64_REG_32(ARM64_REG_W5, ctx->x[5])
+      ARM64_REG_32(ARM64_REG_W6, ctx->x[6])
+      ARM64_REG_32(ARM64_REG_W7, ctx->x[7])
+      ARM64_REG_32(ARM64_REG_W8, ctx->x[8])
+      ARM64_REG_32(ARM64_REG_W9, ctx->x[9])
+      ARM64_REG_32(ARM64_REG_W10, ctx->x[10])
+      ARM64_REG_32(ARM64_REG_W11, ctx->x[11])
+      ARM64_REG_32(ARM64_REG_W12, ctx->x[12])
+      ARM64_REG_32(ARM64_REG_W13, ctx->x[13])
+      ARM64_REG_32(ARM64_REG_W14, ctx->x[14])
+      ARM64_REG_32(ARM64_REG_W15, ctx->x[15])
+      ARM64_REG_32(ARM64_REG_W16, ctx->x[16])
+      ARM64_REG_32(ARM64_REG_W17, ctx->x[17])
+      ARM64_REG_32(ARM64_REG_W18, ctx->x[18])
+      ARM64_REG_32(ARM64_REG_W19, ctx->x[19])
+      ARM64_REG_32(ARM64_REG_W20, ctx->x[20])
+      ARM64_REG_32(ARM64_REG_W21, ctx->x[21])
+      ARM64_REG_32(ARM64_REG_W22, ctx->x[22])
+      ARM64_REG_32(ARM64_REG_W23, ctx->x[23])
+      ARM64_REG_32(ARM64_REG_W24, ctx->x[24])
+      ARM64_REG_32(ARM64_REG_W25, ctx->x[25])
+      ARM64_REG_32(ARM64_REG_W26, ctx->x[26])
+      ARM64_REG_32(ARM64_REG_W27, ctx->x[27])
+      ARM64_REG_32(ARM64_REG_W28, ctx->x[28])
+      ARM64_REG_32(ARM64_REG_W29, ctx->fp)
+      ARM64_REG_32(ARM64_REG_W30, ctx->lr)
+
+      ARM64_REG_64(ARM64_REG_X0, ctx->x[0])
+      ARM64_REG_64(ARM64_REG_X1, ctx->x[1])
+      ARM64_REG_64(ARM64_REG_X2, ctx->x[2])
+      ARM64_REG_64(ARM64_REG_X3, ctx->x[3])
+      ARM64_REG_64(ARM64_REG_X4, ctx->x[4])
+      ARM64_REG_64(ARM64_REG_X5, ctx->x[5])
+      ARM64_REG_64(ARM64_REG_X6, ctx->x[6])
+      ARM64_REG_64(ARM64_REG_X7, ctx->x[7])
+      ARM64_REG_64(ARM64_REG_X8, ctx->x[8])
+      ARM64_REG_64(ARM64_REG_X9, ctx->x[9])
+      ARM64_REG_64(ARM64_REG_X10, ctx->x[10])
+      ARM64_REG_64(ARM64_REG_X11, ctx->x[11])
+      ARM64_REG_64(ARM64_REG_X12, ctx->x[12])
+      ARM64_REG_64(ARM64_REG_X13, ctx->x[13])
+      ARM64_REG_64(ARM64_REG_X14, ctx->x[14])
+      ARM64_REG_64(ARM64_REG_X15, ctx->x[15])
+      ARM64_REG_64(ARM64_REG_X16, ctx->x[16])
+      ARM64_REG_64(ARM64_REG_X17, ctx->x[17])
+      ARM64_REG_64(ARM64_REG_X18, ctx->x[18])
+      ARM64_REG_64(ARM64_REG_X19, ctx->x[19])
+      ARM64_REG_64(ARM64_REG_X20, ctx->x[20])
+      ARM64_REG_64(ARM64_REG_X21, ctx->x[21])
+      ARM64_REG_64(ARM64_REG_X22, ctx->x[22])
+      ARM64_REG_64(ARM64_REG_X23, ctx->x[23])
+      ARM64_REG_64(ARM64_REG_X24, ctx->x[24])
+      ARM64_REG_64(ARM64_REG_X25, ctx->x[25])
+      ARM64_REG_64(ARM64_REG_X26, ctx->x[26])
+      ARM64_REG_64(ARM64_REG_X27, ctx->x[27])
+      ARM64_REG_64(ARM64_REG_X28, ctx->x[28])
+      ARM64_REG_64(ARM64_REG_FP, ctx->fp)
+      ARM64_REG_64(ARM64_REG_LR, ctx->lr)
+      ARM64_REG_64(ARM64_REG_SP, ctx->sp)
+
+    default:
+      FATAL("Failed to read register: %d", reg);
+      return 0;
+
+  }
+
+}
+
+size_t ctx_get_size(const cs_insn *instr, cs_arm64_op *operand) {
+
+  uint8_t num_registers;
+  uint8_t count_byte;
+  char    vas_digit;
+  size_t  mnemonic_len;
+
+  switch (instr->id) {
+
+    case ARM64_INS_STP:
+    case ARM64_INS_STXP:
+    case ARM64_INS_STNP:
+    case ARM64_INS_STLXP:
+    case ARM64_INS_LDP:
+    case ARM64_INS_LDXP:
+    case ARM64_INS_LDNP:
+      num_registers = 2;
+      break;
+    default:
+      num_registers = 1;
+      break;
+
+  }
+
+  mnemonic_len = strlen(instr->mnemonic);
+  if (mnemonic_len == 0) { FATAL("No mnemonic found"); };
+
+  char last = instr->mnemonic[mnemonic_len - 1];
+  switch (last) {
+
+    case 'b':
+      return 1;
+    case 'h':
+      return 2;
+    case 'w':
+      return 4 * num_registers;
+
+  }
+
+  if (operand->vas == ARM64_VAS_INVALID) {
+
+    if (operand->type == ARM64_OP_REG) {
+
+      switch (operand->reg) {
+
+        case ARM64_REG_WZR:
+        case ARM64_REG_WSP:
+        case ARM64_REG_W0 ... ARM64_REG_W30:
+        case ARM64_REG_S0 ... ARM64_REG_S31:
+          return 4 * num_registers;
+        case ARM64_REG_D0 ... ARM64_REG_D31:
+          return 8 * num_registers;
+        case ARM64_REG_Q0 ... ARM64_REG_Q31:
+          return 16;
+        default:
+          return 8 * num_registers;
+          ;
+
+      }
+
+    }
+
+    return 8 * num_registers;
+
+  }
+
+  if (g_str_has_prefix(instr->mnemonic, "st") ||
+      g_str_has_prefix(instr->mnemonic, "ld")) {
+
+    if (mnemonic_len < 3) {
+
+      FATAL("VAS Mnemonic too short: %s\n", instr->mnemonic);
+
+    }
+
+    vas_digit = instr->mnemonic[2];
+    if (vas_digit < '0' || vas_digit > '9') {
+
+      FATAL("VAS Mnemonic digit out of range: %s\n", instr->mnemonic);
+
+    }
+
+    count_byte = vas_digit - '0';
+
+  } else {
+
+    count_byte = 1;
+
+  }
+
+  switch (operand->vas) {
+
+    case ARM64_VAS_1B:
+      return 1 * count_byte;
+    case ARM64_VAS_1H:
+      return 2 * count_byte;
+    case ARM64_VAS_4B:
+    case ARM64_VAS_1S:
+    case ARM64_VAS_1D:
+    case ARM64_VAS_2H:
+      return 4 * count_byte;
+    case ARM64_VAS_8B:
+    case ARM64_VAS_4H:
+    case ARM64_VAS_2S:
+    case ARM64_VAS_2D:
+    case ARM64_VAS_1Q:
+      return 8 * count_byte;
+    case ARM64_VAS_8H:
+    case ARM64_VAS_4S:
+    case ARM64_VAS_16B:
+      return 16 * count_byte;
+    default:
+      FATAL("Unexpected VAS type: %s %d", instr->mnemonic, operand->vas);
+
+  }
+
+}
+
+#endif
+
diff --git a/frida_mode/src/instrument/instrument.c b/frida_mode/src/instrument/instrument.c
index cd1ac0be..f261e79a 100644
--- a/frida_mode/src/instrument/instrument.c
+++ b/frida_mode/src/instrument/instrument.c
@@ -84,6 +84,8 @@ static void instr_basic_block(GumStalkerIterator *iterator,
 
   while (gum_stalker_iterator_next(iterator, &instr)) {
 
+    if (unlikely(begin)) { instrument_debug_start(instr->address, output); }
+
     if (instr->address == entry_start) { entry_prologue(iterator, output); }
     if (instr->address == persistent_start) { persistent_prologue(output); }
     if (instr->address == persistent_ret) { persistent_epilogue(output); }
@@ -119,8 +121,6 @@ static void instr_basic_block(GumStalkerIterator *iterator,
 
     if (unlikely(begin)) {
 
-      instrument_debug_start(instr->address, output);
-
       prefetch_write(GSIZE_TO_POINTER(instr->address));
 
       if (likely(!excluded)) {
@@ -155,6 +155,7 @@ static void instr_basic_block(GumStalkerIterator *iterator,
 
   }
 
+  instrument_flush(output);
   instrument_debug_end(output);
 
 }
diff --git a/frida_mode/src/instrument/instrument_arm32.c b/frida_mode/src/instrument/instrument_arm32.c
index 1a3c40bb..450a69a3 100644
--- a/frida_mode/src/instrument/instrument_arm32.c
+++ b/frida_mode/src/instrument/instrument_arm32.c
@@ -22,5 +22,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_arm_writer_flush(output->writer.arm);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_arm_writer_cur(output->writer.arm);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/instrument/instrument_arm64.c b/frida_mode/src/instrument/instrument_arm64.c
index fa3afb48..49ee86a2 100644
--- a/frida_mode/src/instrument/instrument_arm64.c
+++ b/frida_mode/src/instrument/instrument_arm64.c
@@ -93,5 +93,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_arm64_writer_flush(output->writer.arm64);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_arm64_writer_cur(output->writer.arm64);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/instrument/instrument_debug.c b/frida_mode/src/instrument/instrument_debug.c
index f8c1df77..0ce26a1c 100644
--- a/frida_mode/src/instrument/instrument_debug.c
+++ b/frida_mode/src/instrument/instrument_debug.c
@@ -7,6 +7,7 @@
 
 #include "debug.h"
 
+#include "instrument.h"
 #include "util.h"
 
 static int      debugging_fd = -1;
@@ -31,44 +32,50 @@ static void instrument_debug(char *format, ...) {
 
 }
 
-static void instrument_disasm(guint8 *code, guint size) {
+static void instrument_disasm(guint8 *start, guint8 *end) {
 
   csh      capstone;
   cs_err   err;
+  uint16_t size;
   cs_insn *insn;
-  size_t   count, i;
+  size_t   count = 0;
+  size_t   i;
+  uint16_t len;
 
   err = cs_open(GUM_DEFAULT_CS_ARCH,
                 GUM_DEFAULT_CS_MODE | GUM_DEFAULT_CS_ENDIAN, &capstone);
   g_assert(err == CS_ERR_OK);
 
-  count = cs_disasm(capstone, code, size, GPOINTER_TO_SIZE(code), 0, &insn);
-  g_assert(insn != NULL);
+  size = GPOINTER_TO_SIZE(end) - GPOINTER_TO_SIZE(start);
 
-  for (i = 0; i != count; i++) {
+  for (guint8 *curr = start; curr < end; curr += len, size -= len, len = 0) {
 
-    instrument_debug("\t0x%" G_GINT64_MODIFIER "x\t%s %s\n", insn[i].address,
-                     insn[i].mnemonic, insn[i].op_str);
+    count = cs_disasm(capstone, curr, size, GPOINTER_TO_SIZE(curr), 0, &insn);
+    if (insn == NULL) {
 
-  }
+      instrument_debug("\t0x%" G_GINT64_MODIFIER "x\t* 0x%016" G_GSIZE_MODIFIER
+                       "x\n",
+                       curr, *(size_t *)curr);
 
-  cs_free(insn, count);
+      len += sizeof(size_t);
+      continue;
 
-  cs_close(&capstone);
+    }
 
-}
+    for (i = 0; i != count; i++) {
+
+      instrument_debug("\t0x%" G_GINT64_MODIFIER "x\t%s %s\n", insn[i].address,
+                       insn[i].mnemonic, insn[i].op_str);
+
+      len += insn[i].size;
 
-static gpointer instrument_cur(GumStalkerOutput *output) {
+    }
 
-#if defined(__i386__) || defined(__x86_64__)
-  return gum_x86_writer_cur(output->writer.x86);
-#elif defined(__aarch64__)
-  return gum_arm64_writer_cur(output->writer.arm64);
-#elif defined(__arm__)
-  return gum_arm_writer_cur(output->writer.arm);
-#else
-  #error "Unsupported architecture"
-#endif
+  }
+
+  cs_free(insn, count);
+
+  cs_close(&capstone);
 
 }
 
@@ -111,7 +118,7 @@ void instrument_debug_instruction(uint64_t address, uint16_t size) {
 
   if (likely(debugging_fd < 0)) { return; }
   uint8_t *start = (uint8_t *)GSIZE_TO_POINTER(address);
-  instrument_disasm(start, size);
+  instrument_disasm(start, start + size);
 
 }
 
@@ -119,11 +126,10 @@ void instrument_debug_end(GumStalkerOutput *output) {
 
   if (likely(debugging_fd < 0)) { return; }
   gpointer instrument_gen_end = instrument_cur(output);
-  uint16_t size = GPOINTER_TO_SIZE(instrument_gen_end) -
-                  GPOINTER_TO_SIZE(instrument_gen_start);
 
-  instrument_debug("\nGenerated block %p\n", instrument_gen_start);
-  instrument_disasm(instrument_gen_start, size);
+  instrument_debug("\nGenerated block %p-%p\n", instrument_gen_start,
+                   instrument_gen_end);
+  instrument_disasm(instrument_gen_start, instrument_gen_end);
 
 }
 
diff --git a/frida_mode/src/instrument/instrument_x64.c b/frida_mode/src/instrument/instrument_x64.c
index 901f3bd0..7000e65d 100644
--- a/frida_mode/src/instrument/instrument_x64.c
+++ b/frida_mode/src/instrument/instrument_x64.c
@@ -89,5 +89,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_x86_writer_flush(output->writer.x86);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_x86_writer_cur(output->writer.x86);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/instrument/instrument_x86.c b/frida_mode/src/instrument/instrument_x86.c
index 585bb5b8..04a19e08 100644
--- a/frida_mode/src/instrument/instrument_x86.c
+++ b/frida_mode/src/instrument/instrument_x86.c
@@ -81,5 +81,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_x86_writer_flush(output->writer.x86);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_x86_writer_cur(output->writer.x86);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/persistent/persistent_arm64.c b/frida_mode/src/persistent/persistent_arm64.c
index 1215d8da..b23693fe 100644
--- a/frida_mode/src/persistent/persistent_arm64.c
+++ b/frida_mode/src/persistent/persistent_arm64.c
@@ -1,9 +1,11 @@
+#include <unistd.h>
 #include "frida-gum.h"
 
 #include "config.h"
 #include "debug.h"
 
 #include "instrument.h"
+#include "persistent.h"
 #include "util.h"
 
 #if defined(__aarch64__)
@@ -98,23 +100,365 @@ struct arm64_regs {
 
 typedef struct arm64_regs arch_api_regs;
 
+static arch_api_regs saved_regs = {0};
+static gpointer      saved_lr = NULL;
+
 gboolean persistent_is_supported(void) {
 
-  return false;
+  return true;
+
+}
+
+static void instrument_persitent_save_regs(GumArm64Writer *   cw,
+                                           struct arm64_regs *regs) {
+
+  GumAddress    regs_address = GUM_ADDRESS(regs);
+  const guint32 mrs_x1_nzcv = 0xd53b4201;
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, -(16 + GUM_RED_ZONE_SIZE),
+      GUM_INDEX_PRE_ADJUST);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_SP, -(16),
+                                              GUM_INDEX_PRE_ADJUST);
+
+  gum_arm64_writer_put_instruction(cw, mrs_x1_nzcv);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(regs_address));
+
+  /* Skip x0 & x1 we'll do that later */
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_X0, (16 * 1),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X4, ARM64_REG_X5,
+                                              ARM64_REG_X0, (16 * 2),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X6, ARM64_REG_X7,
+                                              ARM64_REG_X0, (16 * 3),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X8, ARM64_REG_X9,
+                                              ARM64_REG_X0, (16 * 4),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X10, ARM64_REG_X11,
+                                              ARM64_REG_X0, (16 * 5),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X12, ARM64_REG_X13,
+                                              ARM64_REG_X0, (16 * 6),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X14, ARM64_REG_X15,
+                                              ARM64_REG_X0, (16 * 7),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X16, ARM64_REG_X17,
+                                              ARM64_REG_X0, (16 * 8),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X18, ARM64_REG_X19,
+                                              ARM64_REG_X0, (16 * 9),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X20, ARM64_REG_X21,
+                                              ARM64_REG_X0, (16 * 10),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X22, ARM64_REG_X23,
+                                              ARM64_REG_X0, (16 * 11),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X24, ARM64_REG_X25,
+                                              ARM64_REG_X0, (16 * 12),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X26, ARM64_REG_X27,
+                                              ARM64_REG_X0, (16 * 13),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X28, ARM64_REG_X29,
+                                              ARM64_REG_X0, (16 * 14),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* LR & Adjusted SP */
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_X2, ARM64_REG_SP,
+                                       (GUM_RED_ZONE_SIZE + 32));
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X30, ARM64_REG_X2,
+                                              ARM64_REG_X0, (16 * 15),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* PC & CPSR */
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X2,
+                                       GUM_ADDRESS(persistent_start));
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 16),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q0, ARM64_REG_Q1,
+                                              ARM64_REG_X0, (16 * 17),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q2, ARM64_REG_Q3,
+                                              ARM64_REG_X0, (16 * 18),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q4, ARM64_REG_Q5,
+                                              ARM64_REG_X0, (16 * 19),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q6, ARM64_REG_Q7,
+                                              ARM64_REG_X0, (16 * 20),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* x0 & x1 */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_SP, 16,
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_X0, (16 * 0),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* Pop the saved values */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(
+      cw, ARM64_REG_X2, ARM64_REG_X3, ARM64_REG_SP, 16, GUM_INDEX_POST_ADJUST);
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, 16 + GUM_RED_ZONE_SIZE,
+      GUM_INDEX_POST_ADJUST);
+
+}
+
+static void instrument_persitent_restore_regs(GumArm64Writer *   cw,
+                                              struct arm64_regs *regs) {
+
+  GumAddress    regs_address = GUM_ADDRESS(regs);
+  const guint32 msr_nzcv_x1 = 0xd51b4201;
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(regs_address));
+
+  /* Skip x0 - x3 we'll do that last */
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X4, ARM64_REG_X5,
+                                              ARM64_REG_X0, (16 * 2),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X6, ARM64_REG_X7,
+                                              ARM64_REG_X0, (16 * 3),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X8, ARM64_REG_X9,
+                                              ARM64_REG_X0, (16 * 4),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X10, ARM64_REG_X11,
+                                              ARM64_REG_X0, (16 * 5),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X12, ARM64_REG_X13,
+                                              ARM64_REG_X0, (16 * 6),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X14, ARM64_REG_X15,
+                                              ARM64_REG_X0, (16 * 7),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X16, ARM64_REG_X17,
+                                              ARM64_REG_X0, (16 * 8),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X18, ARM64_REG_X19,
+                                              ARM64_REG_X0, (16 * 9),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X20, ARM64_REG_X21,
+                                              ARM64_REG_X0, (16 * 10),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X22, ARM64_REG_X23,
+                                              ARM64_REG_X0, (16 * 11),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X24, ARM64_REG_X25,
+                                              ARM64_REG_X0, (16 * 12),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X26, ARM64_REG_X27,
+                                              ARM64_REG_X0, (16 * 13),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X28, ARM64_REG_X29,
+                                              ARM64_REG_X0, (16 * 14),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* Don't restore RIP or RSP, use x1-x3 as clobber */
+
+  /* LR & Adjusted SP (clobber x1) */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X30, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 15),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* PC (x2) & CPSR (x1) */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 16),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_instruction(cw, msr_nzcv_x1);
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q0, ARM64_REG_Q1,
+                                              ARM64_REG_X0, (16 * 17),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q2, ARM64_REG_Q3,
+                                              ARM64_REG_X0, (16 * 18),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q4, ARM64_REG_Q5,
+                                              ARM64_REG_X0, (16 * 19),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q6, ARM64_REG_Q7,
+                                              ARM64_REG_X0, (16 * 20),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* x2 & x3 */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_X0, (16 * 1),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  /* x0 & x1 */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X0, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 0),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+}
+
+static void instrument_exit(GumArm64Writer *cw) {
+
+  gum_arm64_writer_put_mov_reg_reg(cw, ARM64_REG_X0, ARM64_REG_XZR);
+  gum_arm64_writer_put_call_address_with_arguments(
+      cw, GUM_ADDRESS(_exit), 1, GUM_ARG_REGISTER, ARM64_REG_X0);
+
+}
+
+static int instrument_afl_persistent_loop_func(void) {
+
+  int ret = __afl_persistent_loop(persistent_count);
+  previous_pc = 0;
+  return ret;
+
+}
+
+static void instrument_afl_persistent_loop(GumArm64Writer *cw) {
+
+  gum_arm64_writer_put_sub_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+  gum_arm64_writer_put_call_address_with_arguments(
+      cw, GUM_ADDRESS(instrument_afl_persistent_loop_func), 0);
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+
+}
+
+static void persistent_prologue_hook(GumArm64Writer *   cw,
+                                     struct arm64_regs *regs) {
+
+  if (hook == NULL) return;
+
+  gum_arm64_writer_put_sub_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X3,
+                                       GUM_ADDRESS(&__afl_fuzz_len));
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X3, ARM64_REG_X3, 0);
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X3, ARM64_REG_X3, 0);
+
+  gum_arm64_writer_put_and_reg_reg_imm(cw, ARM64_REG_X3, ARM64_REG_X3,
+                                       G_MAXULONG);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X2,
+                                       GUM_ADDRESS(&__afl_fuzz_ptr));
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X2, 0);
+
+  gum_arm64_writer_put_call_address_with_arguments(
+      cw, GUM_ADDRESS(hook), 4, GUM_ARG_ADDRESS, GUM_ADDRESS(regs),
+      GUM_ARG_ADDRESS, GUM_ADDRESS(0), GUM_ARG_REGISTER, ARM64_REG_X2,
+      GUM_ARG_REGISTER, ARM64_REG_X3);
+
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+
+}
+
+static void instrument_persitent_save_lr(GumArm64Writer *cw) {
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, -(16 + GUM_RED_ZONE_SIZE),
+      GUM_INDEX_PRE_ADJUST);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(&saved_lr));
+
+  gum_arm64_writer_put_str_reg_reg_offset(cw, ARM64_REG_LR, ARM64_REG_X0, 0);
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, 16 + GUM_RED_ZONE_SIZE,
+      GUM_INDEX_POST_ADJUST);
 
 }
 
 void persistent_prologue(GumStalkerOutput *output) {
 
-  UNUSED_PARAMETER(output);
-  FATAL("Persistent mode not supported on this architecture");
+  /*
+   *  SAVE REGS
+   *  SAVE RET
+   *  POP RET
+   * loop:
+   *  CALL instrument_afl_persistent_loop
+   *  TEST EAX, EAX
+   *  JZ end:
+   *  call hook (optionally)
+   *  RESTORE REGS
+   *  call original
+   *  jmp loop:
+   *
+   * end:
+   *  JMP SAVED RET
+   *
+   * original:
+   *  INSTRUMENTED PERSISTENT FUNC
+   */
+
+  GumArm64Writer *cw = output->writer.arm64;
+
+  gconstpointer loop = cw->code + 1;
+
+  /* Stack must be 16-byte aligned per ABI */
+  instrument_persitent_save_regs(cw, &saved_regs);
+
+  /* loop: */
+  gum_arm64_writer_put_label(cw, loop);
+
+  /* call instrument_prologue_func */
+  instrument_afl_persistent_loop(cw);
+
+  /* jz done */
+  gconstpointer done = cw->code + 1;
+  gum_arm64_writer_put_cmp_reg_reg(cw, ARM64_REG_X0, ARM64_REG_XZR);
+  gum_arm64_writer_put_b_cond_label(cw, ARM64_CC_EQ, done);
+
+  /* Optionally call the persistent hook */
+  persistent_prologue_hook(cw, &saved_regs);
+
+  instrument_persitent_restore_regs(cw, &saved_regs);
+  gconstpointer original = cw->code + 1;
+  /* call original */
+
+  gum_arm64_writer_put_bl_label(cw, original);
+
+  /* jmp loop */
+  gum_arm64_writer_put_b_label(cw, loop);
+
+  /* done: */
+  gum_arm64_writer_put_label(cw, done);
+
+  instrument_exit(cw);
+
+  /* original: */
+  gum_arm64_writer_put_label(cw, original);
+
+  instrument_persitent_save_lr(cw);
+
+  if (persistent_debug) { gum_arm64_writer_put_brk_imm(cw, 0); }
 
 }
 
 void persistent_epilogue(GumStalkerOutput *output) {
 
-  UNUSED_PARAMETER(output);
-  FATAL("Persistent mode not supported on this architecture");
+  GumArm64Writer *cw = output->writer.arm64;
+
+  if (persistent_debug) { gum_arm64_writer_put_brk_imm(cw, 0); }
+
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       persistent_ret_offset);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(&saved_lr));
+
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X0, ARM64_REG_X0, 0);
+
+  gum_arm64_writer_put_br_reg(cw, ARM64_REG_X0);
 
 }
 
diff --git a/frida_mode/src/persistent/persistent_x64.c b/frida_mode/src/persistent/persistent_x64.c
index 4cb960fc..858ad38e 100644
--- a/frida_mode/src/persistent/persistent_x64.c
+++ b/frida_mode/src/persistent/persistent_x64.c
@@ -306,8 +306,6 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
-  gum_x86_writer_flush(cw);
-
 }
 
 void persistent_epilogue(GumStalkerOutput *output) {
diff --git a/frida_mode/src/persistent/persistent_x86.c b/frida_mode/src/persistent/persistent_x86.c
index b30dfadf..0675edf4 100644
--- a/frida_mode/src/persistent/persistent_x86.c
+++ b/frida_mode/src/persistent/persistent_x86.c
@@ -246,8 +246,6 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
-  gum_x86_writer_flush(cw);
-
 }
 
 void persistent_epilogue(GumStalkerOutput *output) {
diff --git a/frida_mode/src/stats/stats.c b/frida_mode/src/stats/stats.c
index 662fb6d5..0d7b9fb0 100644
--- a/frida_mode/src/stats/stats.c
+++ b/frida_mode/src/stats/stats.c
@@ -96,7 +96,6 @@ void stats_init(void) {
 void stats_vprint(int fd, char *format, va_list ap) {
 
   char buffer[4096] = {0};
-  int  ret;
   int  len;
 
   if (vsnprintf(buffer, sizeof(buffer) - 1, format, ap) < 0) { return; }
diff --git a/frida_mode/src/stats/stats_arm.c b/frida_mode/src/stats/stats_arm32.c
index 7eea7f91..7eea7f91 100644
--- a/frida_mode/src/stats/stats_arm.c
+++ b/frida_mode/src/stats/stats_arm32.c