From 405382cbddea8b99543c3fddcaa5738b1ed3ade3 Mon Sep 17 00:00:00 2001 From: WorksButNotTested <62701594+WorksButNotTested@users.noreply.github.com> Date: Tue, 6 Jul 2021 20:15:30 +0100 Subject: Frida build fixes (#1010) Co-authored-by: Your Name --- frida_mode/hook/qemu_hook.c | 192 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 frida_mode/hook/qemu_hook.c (limited to 'frida_mode/hook/qemu_hook.c') diff --git a/frida_mode/hook/qemu_hook.c b/frida_mode/hook/qemu_hook.c new file mode 100644 index 00000000..5b4f65b1 --- /dev/null +++ b/frida_mode/hook/qemu_hook.c @@ -0,0 +1,192 @@ +#include +#include + +#if defined(__x86_64__) + +struct x86_64_regs { + + uint64_t rax, rbx, rcx, rdx, rdi, rsi, rbp, r8, r9, r10, r11, r12, r13, r14, + r15; + + union { + + uint64_t rip; + uint64_t pc; + + }; + + union { + + uint64_t rsp; + uint64_t sp; + + }; + + union { + + uint64_t rflags; + uint64_t flags; + + }; + + uint8_t zmm_regs[32][64]; + +}; + +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + + memcpy((void *)regs->rdi, input_buf, input_buf_len); + regs->rsi = input_buf_len; + +} + +#elif defined(__i386__) + +struct x86_regs { + + uint32_t eax, ebx, ecx, edx, edi, esi, ebp; + + union { + + uint32_t eip; + uint32_t pc; + + }; + + union { + + uint32_t esp; + uint32_t sp; + + }; + + union { + + uint32_t eflags; + uint32_t flags; + + }; + + uint8_t xmm_regs[8][16]; + +}; + +void afl_persistent_hook(struct x86_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + + void **esp = (void **)regs->esp; + void * arg1 = esp[1]; + void **arg2 = &esp[2]; + memcpy(arg1, input_buf, input_buf_len); + *arg2 = (void *)input_buf_len; + +} +#elif defined(__aarch64__) + +struct arm64_regs { + + uint64_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10; + + union { + + uint64_t x11; + uint32_t fp_32; + + }; + + union { + + uint64_t x12; + uint32_t ip_32; + + }; + + union { + + uint64_t x13; + uint32_t sp_32; + + }; + + union { + + uint64_t x14; + uint32_t lr_32; + + }; + + union { + + uint64_t x15; + uint32_t pc_32; + + }; + + union { + + uint64_t x16; + uint64_t ip0; + + }; + + union { + + uint64_t x17; + uint64_t ip1; + + }; + + uint64_t x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28; + + union { + + uint64_t x29; + uint64_t fp; + + }; + + union { + + uint64_t x30; + uint64_t lr; + + }; + + union { + + uint64_t x31; + uint64_t sp; + + }; + + // the zero register is not saved here ofc + + uint64_t pc; + + uint32_t cpsr; + + uint8_t vfp_zregs[32][16 * 16]; + uint8_t vfp_pregs[17][32]; + uint32_t vfp_xregs[16]; + +}; + +void afl_persistent_hook(struct arm64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + + memcpy((void *)regs->x0, input_buf, input_buf_len); + regs->x1 = input_buf_len; +} + +#else + #pragma error "Unsupported architecture" +#endif + +int afl_persistent_hook_init(void) { + + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) + return 1; + +} -- cgit 1.4.1 From 94999782f1a3742e3e755a66f5d76e84573ae6ef Mon Sep 17 00:00:00 2001 From: WorksButNotTested <62701594+WorksButNotTested@users.noreply.github.com> Date: Wed, 14 Jul 2021 08:48:37 +0100 Subject: Improved block and edge numbering to reduce collisions (#1021) Co-authored-by: Your Name --- frida_mode/GNUmakefile | 30 ++++-- frida_mode/MapDensity.md | 147 +++++++++++++++++++++++++++ frida_mode/README.md | 4 + frida_mode/hook/qemu_hook.c | 3 + frida_mode/include/instrument.h | 16 ++- frida_mode/many-linux/Dockerfile | 2 +- frida_mode/src/entry.c | 2 +- frida_mode/src/instrument/instrument.c | 62 ++++++++--- frida_mode/src/instrument/instrument_arm64.c | 16 +-- frida_mode/src/instrument/instrument_x64.c | 19 ++-- frida_mode/src/instrument/instrument_x86.c | 22 ++-- frida_mode/src/persistent/persistent_arm64.c | 2 +- frida_mode/src/persistent/persistent_x64.c | 2 +- frida_mode/src/persistent/persistent_x86.c | 2 +- frida_mode/util/get_symbol_addr.sh | 2 +- 15 files changed, 277 insertions(+), 54 deletions(-) create mode 100644 frida_mode/MapDensity.md (limited to 'frida_mode/hook/qemu_hook.c') diff --git a/frida_mode/GNUmakefile b/frida_mode/GNUmakefile index 582cf8d6..44dfafe3 100644 --- a/frida_mode/GNUmakefile +++ b/frida_mode/GNUmakefile @@ -21,7 +21,7 @@ CFLAGS+=-fPIC \ -funroll-loops \ -ffunction-sections \ -RT_CFLAGS:=-Wno-unused-parameter \ +AFL_CFLAGS:=-Wno-unused-parameter \ -Wno-sign-compare \ -Wno-unused-function \ -Wno-unused-result \ @@ -60,10 +60,10 @@ endif ifeq "$(shell uname)" "Darwin" OS:=macos - RT_CFLAGS:=$(RT_CFLAGS) -Wno-deprecated-declarations + AFL_CFLAGS:=$(AFL_CFLAGS) -Wno-deprecated-declarations else ifdef DEBUG - RT_CFLAGS:=$(RT_CFLAGS) -Wno-prio-ctor-dtor + AFL_CFLAGS:=$(AFL_CFLAGS) -Wno-prio-ctor-dtor endif LDFLAGS+= -z noexecstack \ -Wl,--gc-sections \ @@ -79,7 +79,12 @@ ifndef OS $(error "Operating system unsupported") endif +ifeq "$(ARCH)" "arm64" +# 15.0.0 Not released for aarch64 yet +GUM_DEVKIT_VERSION=14.2.18 +else GUM_DEVKIT_VERSION=15.0.0 +endif GUM_DEVKIT_FILENAME=frida-gumjs-devkit-$(GUM_DEVKIT_VERSION)-$(OS)-$(ARCH).tar.xz GUM_DEVKIT_URL="https://github.com/frida/frida/releases/download/$(GUM_DEVKIT_VERSION)/$(GUM_DEVKIT_FILENAME)" @@ -98,6 +103,9 @@ FRIDA_GUM_DEVKIT_COMPRESSED_TARBALL:=$(FRIDA_DIR)build/$(GUM_DEVKIT_FILENAME) AFL_COMPILER_RT_SRC:=$(ROOT)instrumentation/afl-compiler-rt.o.c AFL_COMPILER_RT_OBJ:=$(OBJ_DIR)afl-compiler-rt.o +AFL_PERFORMANCE_SRC:=$(ROOT)src/afl-performance.c +AFL_PERFORMANCE_OBJ:=$(OBJ_DIR)afl-performance.o + HOOK_DIR:=$(PWD)hook/ AFLPP_FRIDA_DRIVER_HOOK_SRC=$(HOOK_DIR)frida_hook.c AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(BUILD_DIR)frida_hook.so @@ -163,7 +171,16 @@ $(GUM_DEVIT_HEADER): $(GUM_DEVKIT_TARBALL) $(AFL_COMPILER_RT_OBJ): $(AFL_COMPILER_RT_SRC) $(CC) \ $(CFLAGS) \ - $(RT_CFLAGS) \ + $(AFL_CFLAGS) \ + -I $(ROOT) \ + -I $(ROOT)include \ + -o $@ \ + -c $< + +$(AFL_PERFORMANCE_OBJ): $(AFL_PERFORMANCE_SRC) + $(CC) \ + $(CFLAGS) \ + $(AFL_CFLAGS) \ -I $(ROOT) \ -I $(ROOT)include \ -o $@ \ @@ -172,7 +189,7 @@ $(AFL_COMPILER_RT_OBJ): $(AFL_COMPILER_RT_SRC) ############################### JS ############################################# $(BIN2C): $(BIN2C_SRC) - $(CC) -o $@ $< + $(CC) -D_GNU_SOURCE -o $@ $< $(JS_SRC): $(JS) $(BIN2C)| $(BUILD_DIR) cd $(JS_DIR) && $(BIN2C) api_js $(JS) $@ @@ -203,12 +220,13 @@ $(foreach src,$(SOURCES),$(eval $(call BUILD_SOURCE,$(src),$(OBJ_DIR)$(notdir $( ######################## AFL-FRIDA-TRACE ####################################### -$(FRIDA_TRACE): $(GUM_DEVIT_LIBRARY) $(GUM_DEVIT_HEADER) $(OBJS) $(JS_OBJ) $(AFL_COMPILER_RT_OBJ) GNUmakefile | $(BUILD_DIR) +$(FRIDA_TRACE): $(GUM_DEVIT_LIBRARY) $(GUM_DEVIT_HEADER) $(OBJS) $(JS_OBJ) $(AFL_COMPILER_RT_OBJ) $(AFL_PERFORMANCE_OBJ) GNUmakefile | $(BUILD_DIR) $(CXX) \ $(OBJS) \ $(JS_OBJ) \ $(GUM_DEVIT_LIBRARY) \ $(AFL_COMPILER_RT_OBJ) \ + $(AFL_PERFORMANCE_OBJ) \ $(LDFLAGS) \ $(LDSCRIPT) \ -o $@ \ diff --git a/frida_mode/MapDensity.md b/frida_mode/MapDensity.md new file mode 100644 index 00000000..f4ae3ace --- /dev/null +++ b/frida_mode/MapDensity.md @@ -0,0 +1,147 @@ +# Map Density + +# How Coverage Works +The coverage in AFL++ works by assigning each basic block of code a unique ID +and during execution when transitioning between blocks (e.g. by calls or jumps) +assigning each of these edges an ID based upon the source and destination block +ID. + +For each individual execution of the target, a single dimensional byte array +indexed by the edge ID is used to count how many times each edge is traversed. + +A single dimensional cumulative byte array is also constructed where each byte +again represents an individual edge ID, but this time, the value of the byte +represents a range of how many times that edge has been traversed. + +```1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+``` + +The theory is that a new path isn't particularly interesting if an edge has been +traversed `23` instead of `24` times for example, but is interesting if an edge +has been traversed for the very first time, or the number of times fits within a different bucket. + +After each run, the count of times each edge is hit is compared to the values in +the cumulative map and if it is different, then the input is kept as a new seed +and the cumulative map is updated. + +This mechanism is described in greater detail in the seminal +[paper](https://lcamtuf.coredump.cx/afl/technical_details.txt) on AFL by +[lcamtuf](https://github.com/lcamtuf). + +# Collisions +In black-box fuzzing, we must assume that control may flow from any block to any +other block, since we don't know any better. Thus for a target with `n` basic +blocks of code, there are `n * n` potential edges. As we can see, even with a +small number of edges, a very large map will be required so that we have space +to fit them all. Even if our target only had `1024` blocks, this would require a +map containing `1048576` entries (or 1Mb in size). + +Whilst this may not seem like a lot of memory, it causes problems for two reasons. Firstly, the processing step after each execution must now process much more +data, and secondly a map this size is unlikely to fit within the L2 cache of the processor. Since this is a very hot code path, we are likely to pay a very heavy +performance cost. + +Therefore, we must accept that not all edges can have a unique and that +therefore there will be collisions. This means that if the fuzzer finds a new +path by uncovering an edge which was not previously found, but that the same +edge ID is used by another edge, then it may go completely unnoticed. This is +obviously undesirable, but equally if our map is too large, then we will not be +able to process as many potential inputs in the same time and hence not uncover +edges for that reason. Thus a careful trade-off of map size must be made. + +# Block & Edge Numbering +Since the original AFL, blocks and edges have always been numbered in the same +way as we can see from the following C snippet from the whitepaper. + +```c + cur_location = (block_address >> 4) ^ (block_address << 8); + shared_mem[cur_location ^ prev_location]++; + prev_location = cur_location >> 1; + +``` + +Each block ID is generated by performing a shift and XOR on its address. Then +the edge ID is calculated as `E = B ^ (B' >> 1)`. Here, we can make two +observations. In fact, the edge ID is also masked to ensure it is less than the +size of the map being used. + +## Block IDs +Firstly, the block ID doesn't have very good entropy. If we consider the address +of the block, then whilst each block has a unique ID, it isn't necessarily very +evenly distributed. + +We start with a large address, and need to discard a large number of the bits to +generate a block ID which is within range. But how do we choose the unique bits +of the address verus those which are the same for every block? The high bits of +the address may simply be all `0s` or all `1s` to make the address cannonical, +the middle portion of the address may be the same for all blocks (since if they +are all within the same binary, then they will all be adjacent in memory), and +on some systems, even the low bits may have poor entropy as some use fixed +length aligned instructions. Then we need to consider that a portion of each +binary may contain the `.data` or `.bss` sections and so may not contain any +blocks of code at all. + +## Edge IDs +Secondly, we can observe that when we generate an edge ID from the source and +destination block IDs, we perform a right shift on the source block ID. Whilst +there are good reasons as set out in the whitepaper why such a transform is +applied, in so doing, we dispose of `1` bit of precious entropy in our source +block ID. + +All together, this means that some edge IDs may be more popular than others. +This means that some portions of the map may be very densly populated with large +numbers of edges, whilst others may be very sparsely populated, or not populated +at all. + +# Improvements +One of the main reaons why this algorithm selected, is performance. All of the +operations are very quick to perform and given we may be carrying this out for +every block of code we execute, performance is critical. + +However, the design of the binary instrumentation modes of AFL++ has moved on. +Both QEMU and FRIDA modes use a two stage process when executing a target +application. Each block is first compiled or instrumented, and then it is +executed. The compiled blocks can be re-used each time the target executes them. + +Since a blocks ID is based on its address, and this is known at compile time, we +only need to generate this ID once per block and so this ID generation no longer +needs to be as performant. We can therefore use a hash algorithm to generate +this ID and therefore ensure that the block IDs are more evenly distributed. + +Edge IDs however, can only be determined at run-time. Since we don't know which +blocks a given input will traverse until we run it. However, given our block IDs +are now evenly distributed, generating an evenly distributed edge ID becomes +simple. Here, the only change we make is to use a rotate operation rather than +a shift operation so we don't lose a bit of entropy from the source ID. + +So our new algorithm becomes: +```c + cur_location = hash(block_address) + shared_mem[cur_location ^ prev_location]++; + prev_location = rotate(cur_location, 1); +``` + +Lastly, in the original design, the `cur_location` was always set to `0`, at the +beginning of a run, we instead set the value of `cur_location` to `hash(0)`. + +# Parallel Fuzzing +Another sub-optimal aspect of the original design is that no matter how many +instances of the fuzzer you ran in parallel, each instance numbered each block +and so each edge with the same ID. Each instance would therefore find the same +subset of edges collide with each other. In the event of a collision, all +instances will hit the same road block. + +However, if we instead use a different seed for our hashing function for each +instance, then each will ascribe each block a different ID and hence each edge +will be given a different edge ID. This means that whilst one instance of the +fuzzer may find a given pair of edges collide, it is very unlikely that another +instance will find the same pair also collide. + +Due to the collaborative nature of parallel fuzzing, this means that whilst one +instance may struggle to find a particular new path because the new edge +collides, another instance will likely not encounter the same collision and thus +be able to differentiate this new path and share it with the other instances. + +If only a single new edge is found, and the new path is shared with an instance +for which that edge collides, that instance may disregard it as irrelevant. In +practice, however, the discovery of a single new edge, likely leads to several +more edges beneath it also being found and therefore the likelihood of all of +these being collisions is very slim. diff --git a/frida_mode/README.md b/frida_mode/README.md index 024fc140..6cbb4c4c 100644 --- a/frida_mode/README.md +++ b/frida_mode/README.md @@ -293,6 +293,10 @@ FASAN then adds instrumentation for any instrucutions which use memory operands then calls into the `__asan_loadN` and `__asan_storeN` functions provided by the DSO to validate memory accesses against the shadow memory. +# Collisions +FRIDA mode has also introduced some improvements to reduce collisions in the map. +See [here](MapDensity.md) for details. + ## TODO The next features to be added are Aarch32 support as well as looking at diff --git a/frida_mode/hook/qemu_hook.c b/frida_mode/hook/qemu_hook.c index 5b4f65b1..56e787e3 100644 --- a/frida_mode/hook/qemu_hook.c +++ b/frida_mode/hook/qemu_hook.c @@ -36,6 +36,7 @@ struct x86_64_regs { void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, uint8_t *input_buf, uint32_t input_buf_len) { + (void)guest_base; /* unused */ memcpy((void *)regs->rdi, input_buf, input_buf_len); regs->rsi = input_buf_len; @@ -75,6 +76,7 @@ struct x86_regs { void afl_persistent_hook(struct x86_regs *regs, uint64_t guest_base, uint8_t *input_buf, uint32_t input_buf_len) { + (void)guest_base; /* unused */ void **esp = (void **)regs->esp; void * arg1 = esp[1]; void **arg2 = &esp[2]; @@ -175,6 +177,7 @@ struct arm64_regs { void afl_persistent_hook(struct arm64_regs *regs, uint64_t guest_base, uint8_t *input_buf, uint32_t input_buf_len) { + (void)guest_base; /* unused */ memcpy((void *)regs->x0, input_buf, input_buf_len); regs->x1 = input_buf_len; } diff --git a/frida_mode/include/instrument.h b/frida_mode/include/instrument.h index 9c8d3a5d..695b46af 100644 --- a/frida_mode/include/instrument.h +++ b/frida_mode/include/instrument.h @@ -5,11 +5,12 @@ #include "config.h" -extern char * instrument_debug_filename; -extern gboolean instrument_tracing; -extern gboolean instrument_optimize; -extern gboolean instrument_unique; -extern __thread uint64_t instrument_previous_pc; +extern char * instrument_debug_filename; +extern gboolean instrument_tracing; +extern gboolean instrument_optimize; +extern gboolean instrument_unique; +extern __thread guint64 instrument_previous_pc; +extern guint64 instrument_hash_zero; extern uint8_t *__afl_area_ptr; extern uint32_t __afl_map_size; @@ -33,5 +34,10 @@ void instrument_debug_instruction(uint64_t address, uint16_t size); void instrument_debug_end(GumStalkerOutput *output); void instrument_flush(GumStalkerOutput *output); gpointer instrument_cur(GumStalkerOutput *output); + +void instrument_on_fork(); + +guint64 instrument_get_offset_hash(GumAddress current_rip); + #endif diff --git a/frida_mode/many-linux/Dockerfile b/frida_mode/many-linux/Dockerfile index 1d39c356..2cd56bc8 100644 --- a/frida_mode/many-linux/Dockerfile +++ b/frida_mode/many-linux/Dockerfile @@ -11,7 +11,7 @@ RUN git clone https://github.com/AFLplusplus/AFLplusplus.git WORKDIR /AFLplusplus RUN mkdir -p /AFLplusplus/frida_mode/build/frida/ -RUN curl -L -o /AFLplusplus/frida_mode/build/frida/frida-gumjs-devkit-14.2.18-linux-x86_64.tar.xz "https://github.com/frida/frida/releases/download/14.2.18/frida-gumjs-devkit-14.2.18-linux-x86_64.tar.xz" +RUN curl -L -o /AFLplusplus/frida_mode/build/frida/frida-gumjs-devkit-15.0.0-linux-x86_64.tar.xz "https://github.com/frida/frida/releases/download/15.0.0/frida-gumjs-devkit-15.0.0-linux-x86_64.tar.xz" WORKDIR /AFLplusplus RUN git checkout dev diff --git a/frida_mode/src/entry.c b/frida_mode/src/entry.c index f70e21fc..a0ffd028 100644 --- a/frida_mode/src/entry.c +++ b/frida_mode/src/entry.c @@ -21,7 +21,7 @@ static void entry_launch(void) { __afl_manual_init(); /* Child here */ - instrument_previous_pc = 0; + instrument_on_fork(); stats_on_fork(); } diff --git a/frida_mode/src/instrument/instrument.c b/frida_mode/src/instrument/instrument.c index 2d857716..81d14013 100644 --- a/frida_mode/src/instrument/instrument.c +++ b/frida_mode/src/instrument/instrument.c @@ -6,6 +6,7 @@ #include "config.h" #include "debug.h" +#include "hash.h" #include "asan.h" #include "entry.h" @@ -22,10 +23,12 @@ gboolean instrument_tracing = false; gboolean instrument_optimize = false; gboolean instrument_unique = false; +guint64 instrument_hash_zero = 0; +guint64 instrument_hash_seed = 0; static GumStalkerTransformer *transformer = NULL; -__thread uint64_t instrument_previous_pc = 0; +__thread guint64 instrument_previous_pc = 0; static GumAddress previous_rip = 0; static u8 * edges_notified = NULL; @@ -49,21 +52,18 @@ static void trace_debug(char *format, ...) { } -__attribute__((hot)) static void on_basic_block(GumCpuContext *context, - gpointer user_data) { +guint64 instrument_get_offset_hash(GumAddress current_rip) { - UNUSED_PARAMETER(context); + guint64 area_offset = hash64((unsigned char *)¤t_rip, + sizeof(GumAddress), instrument_hash_seed); + return area_offset &= MAP_SIZE - 1; - GumAddress current_rip = GUM_ADDRESS(user_data); - GumAddress current_pc; - GumAddress edge; - uint8_t * cursor; - uint64_t value; +} - current_pc = (current_rip >> 4) ^ (current_rip << 8); - current_pc &= MAP_SIZE - 1; +__attribute__((hot)) static void instrument_increment_map(GumAddress edge) { - edge = current_pc ^ instrument_previous_pc; + uint8_t *cursor; + uint64_t value; cursor = &__afl_area_ptr[edge]; value = *cursor; @@ -79,7 +79,21 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context, } *cursor = value; - instrument_previous_pc = current_pc >> 1; + +} + +__attribute__((hot)) static void on_basic_block(GumCpuContext *context, + gpointer user_data) { + + UNUSED_PARAMETER(context); + + GumAddress current_rip = GUM_ADDRESS(user_data); + guint64 current_pc = instrument_get_offset_hash(current_rip); + guint64 edge; + + edge = current_pc ^ instrument_previous_pc; + + instrument_increment_map(edge); if (unlikely(instrument_tracing)) { @@ -98,6 +112,9 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context, } + instrument_previous_pc = + ((current_pc & (MAP_SIZE - 1) >> 1)) | ((current_pc & 0x1) << 15); + } static void instrument_basic_block(GumStalkerIterator *iterator, @@ -265,6 +282,19 @@ void instrument_init(void) { } + /* + * By using a different seed value for the hash, we can make different + * instances have edge collisions in different places when carrying out + * parallel fuzzing. The seed itself, doesn't have to be random, it just + * needs to be different for each instance. + */ + instrument_hash_seed = + g_get_monotonic_time() ^ (((guint64)getpid()) << 32) ^ gettid(); + + OKF("Instrumentation - seed [0x%016" G_GINT64_MODIFIER "x]", + instrument_hash_seed); + instrument_hash_zero = instrument_get_offset_hash(0); + instrument_debug_init(); asan_init(); cmplog_init(); @@ -278,3 +308,9 @@ GumStalkerTransformer *instrument_get_transformer(void) { } +void instrument_on_fork() { + + instrument_previous_pc = instrument_hash_zero; + +} + diff --git a/frida_mode/src/instrument/instrument_arm64.c b/frida_mode/src/instrument/instrument_arm64.c index 17f97c97..cf37e048 100644 --- a/frida_mode/src/instrument/instrument_arm64.c +++ b/frida_mode/src/instrument/instrument_arm64.c @@ -12,15 +12,15 @@ static GumAddress current_log_impl = GUM_ADDRESS(0); static const guint8 afl_log_code[] = { // __afl_area_ptr[current_pc ^ previous_pc]++; - // previous_pc = current_pc >> 1; + // previous_pc = current_pc ROR 1; 0xE1, 0x0B, 0xBF, 0xA9, // stp x1, x2, [sp, -0x10]! 0xE3, 0x13, 0xBF, 0xA9, // stp x3, x4, [sp, -0x10]! // x0 = current_pc - 0xe1, 0x01, 0x00, 0x58, // ldr x1, #0x3c, =&__afl_area_ptr + 0x21, 0x02, 0x00, 0x58, // ldr x1, #0x44, =&__afl_area_ptr 0x21, 0x00, 0x40, 0xf9, // ldr x1, [x1] (=__afl_area_ptr) - 0xe2, 0x01, 0x00, 0x58, // ldr x2, #0x3c, =&previous_pc + 0x22, 0x02, 0x00, 0x58, // ldr x2, #0x44, =&previous_pc 0x42, 0x00, 0x40, 0xf9, // ldr x2, [x2] (=previous_pc) // __afl_area_ptr[current_pc ^ previous_pc]++; @@ -30,8 +30,11 @@ static const guint8 afl_log_code[] = { 0x63, 0x00, 0x1f, 0x9a, // adc x3, x3, xzr 0x23, 0x68, 0x22, 0xf8, // str x3, [x1, x2] - // previous_pc = current_pc >> 1; - 0xe0, 0x07, 0x40, 0x8b, // add x0, xzr, x0, LSR #1 + // previous_pc = current_pc ROR 1; + 0xe4, 0x07, 0x40, 0x8b, // add x4, xzr, x0, LSR #1 + 0xe0, 0xff, 0x00, 0x8b, // add x0, xzr, x0, LSL #63 + 0x80, 0xc0, 0x40, 0x8b, // add x0, x4, x0, LSR #48 + 0xe2, 0x00, 0x00, 0x58, // ldr x2, #0x1c, =&previous_pc 0x40, 0x00, 0x00, 0xf9, // str x0, [x2] @@ -54,8 +57,7 @@ void instrument_coverage_optimize(const cs_insn * instr, GumStalkerOutput *output) { guint64 current_pc = instr->address; - guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8); - area_offset &= MAP_SIZE - 1; + guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address)); GumArm64Writer *cw = output->writer.arm64; if (current_log_impl == 0 || diff --git a/frida_mode/src/instrument/instrument_x64.c b/frida_mode/src/instrument/instrument_x64.c index a2b54369..fec8afbb 100644 --- a/frida_mode/src/instrument/instrument_x64.c +++ b/frida_mode/src/instrument/instrument_x64.c @@ -24,7 +24,7 @@ static const guint8 afl_log_code[] = { 0x80, 0x02, 0x01, /* add byte ptr [rdx], 1 */ 0x80, 0x12, 0x00, /* adc byte ptr [rdx], 0 */ - 0x48, 0xd1, 0xef, /* shr rdi, 1 */ + 0x66, 0xd1, 0xcf, /* ror di, 1 */ 0x48, 0x89, 0x39, /* mov qword [rcx], rdi */ 0x5a, /* pop rdx */ @@ -49,13 +49,9 @@ gboolean instrument_is_coverage_optimize_supported(void) { static guint8 align_pad[] = {0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90}; -void instrument_coverage_optimize(const cs_insn * instr, - GumStalkerOutput *output) { +static void instrument_coverate_write_function(GumStalkerOutput *output) { - guint64 current_pc = instr->address; - guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8); - guint64 misalign = 0; - area_offset &= MAP_SIZE - 1; + guint64 misalign = 0; GumX86Writer *cw = output->writer.x86; if (current_log_impl == 0 || @@ -87,6 +83,15 @@ void instrument_coverage_optimize(const cs_insn * instr, } +} + +void instrument_coverage_optimize(const cs_insn * instr, + GumStalkerOutput *output) { + + GumX86Writer *cw = output->writer.x86; + guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address)); + instrument_coverate_write_function(output); + gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, -GUM_RED_ZONE_SIZE); gum_x86_writer_put_push_reg(cw, GUM_REG_RDI); diff --git a/frida_mode/src/instrument/instrument_x86.c b/frida_mode/src/instrument/instrument_x86.c index 3c3dc272..7bf48f96 100644 --- a/frida_mode/src/instrument/instrument_x86.c +++ b/frida_mode/src/instrument/instrument_x86.c @@ -30,7 +30,8 @@ static void instrument_coverage_function(GumX86Writer *cw) { uint8_t adc_byte_ptr_edx_0[] = {0x80, 0x12, 0x00}; gum_x86_writer_put_bytes(cw, adc_byte_ptr_edx_0, sizeof(adc_byte_ptr_edx_0)); - gum_x86_writer_put_shr_reg_u8(cw, GUM_REG_EDI, 1); + uint8_t ror_di_1[] = {0x66, 0xd1, 0xcf}; + gum_x86_writer_put_bytes(cw, ror_di_1, sizeof(ror_di_1)); gum_x86_writer_put_mov_reg_ptr_reg(cw, GUM_REG_ECX, GUM_REG_EDI); gum_x86_writer_put_pop_reg(cw, GUM_REG_EDX); @@ -46,15 +47,8 @@ gboolean instrument_is_coverage_optimize_supported(void) { } -void instrument_coverage_optimize(const cs_insn * instr, - GumStalkerOutput *output) { - - UNUSED_PARAMETER(instr); - UNUSED_PARAMETER(output); +static void instrument_coverate_write_function(GumStalkerOutput *output) { - guint64 current_pc = instr->address; - guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8); - area_offset &= MAP_SIZE - 1; GumX86Writer *cw = output->writer.x86; if (current_log_impl == 0 || @@ -73,7 +67,15 @@ void instrument_coverage_optimize(const cs_insn * instr, } - // gum_x86_writer_put_breakpoint(cw); +} + +void instrument_coverage_optimize(const cs_insn * instr, + GumStalkerOutput *output) { + + GumX86Writer *cw = output->writer.x86; + guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address)); + instrument_coverate_write_function(output); + gum_x86_writer_put_push_reg(cw, GUM_REG_EDI); gum_x86_writer_put_mov_reg_address(cw, GUM_REG_EDI, area_offset); gum_x86_writer_put_call_address(cw, current_log_impl); diff --git a/frida_mode/src/persistent/persistent_arm64.c b/frida_mode/src/persistent/persistent_arm64.c index 4ab7b283..3cd61cd5 100644 --- a/frida_mode/src/persistent/persistent_arm64.c +++ b/frida_mode/src/persistent/persistent_arm64.c @@ -237,7 +237,7 @@ static void instrument_exit(GumArm64Writer *cw) { static int instrument_afl_persistent_loop_func(void) { int ret = __afl_persistent_loop(persistent_count); - instrument_previous_pc = 0; + instrument_previous_pc = instrument_hash_zero; return ret; } diff --git a/frida_mode/src/persistent/persistent_x64.c b/frida_mode/src/persistent/persistent_x64.c index ce3017e4..c0bd9a09 100644 --- a/frida_mode/src/persistent/persistent_x64.c +++ b/frida_mode/src/persistent/persistent_x64.c @@ -174,7 +174,7 @@ static void instrument_exit(GumX86Writer *cw) { static int instrument_afl_persistent_loop_func(void) { int ret = __afl_persistent_loop(persistent_count); - instrument_previous_pc = 0; + instrument_previous_pc = instrument_hash_zero; return ret; } diff --git a/frida_mode/src/persistent/persistent_x86.c b/frida_mode/src/persistent/persistent_x86.c index cc1f1a4f..b911676a 100644 --- a/frida_mode/src/persistent/persistent_x86.c +++ b/frida_mode/src/persistent/persistent_x86.c @@ -130,7 +130,7 @@ static void instrument_exit(GumX86Writer *cw) { static int instrument_afl_persistent_loop_func(void) { int ret = __afl_persistent_loop(persistent_count); - instrument_previous_pc = 0; + instrument_previous_pc = instrument_hash_zero; return ret; } diff --git a/frida_mode/util/get_symbol_addr.sh b/frida_mode/util/get_symbol_addr.sh index 7f9b7d22..f5d8df91 100755 --- a/frida_mode/util/get_symbol_addr.sh +++ b/frida_mode/util/get_symbol_addr.sh @@ -26,7 +26,7 @@ file "$target" | grep -q executable && { exit 0 } -hex_base=$(echo "$3" | awk '{sub("^0x","");print $0}') +hex_base=$(echo "$3" | awk '{sub("^0x","");print $0}' | tr a-f A-F ) nm "$target" | grep -i "T $symbol" | awk '{print$1}' | tr a-f A-F | \ xargs echo "ibase=16;obase=10;$hex_base + " | bc | tr A-F a-f | awk '{print "0x"$0}' exit 0 -- cgit 1.4.1