diff options
Diffstat (limited to 'utils')
-rw-r--r-- | utils/README.md | 17 | ||||
-rw-r--r-- | utils/afl_frida/GNUmakefile | 23 | ||||
-rw-r--r-- | utils/afl_frida/Makefile | 2 | ||||
-rw-r--r-- | utils/afl_frida/README.md | 34 | ||||
-rw-r--r-- | utils/afl_frida/afl-frida.c | 397 | ||||
-rw-r--r-- | utils/afl_frida/afl-frida.h | 53 | ||||
-rw-r--r-- | utils/afl_frida/android/README.md | 1 | ||||
-rw-r--r-- | utils/afl_frida/android/frida-gum-example.c | 130 | ||||
-rw-r--r-- | utils/afl_frida/libtestinstr.c | 35 | ||||
-rw-r--r-- | utils/afl_network_proxy/README.md | 2 | ||||
-rw-r--r-- | utils/aflpp_driver/aflpp_driver_test.c | 2 | ||||
-rw-r--r-- | utils/optimin/.gitignore | 11 | ||||
-rw-r--r-- | utils/optimin/CMakeLists.txt | 22 | ||||
-rw-r--r-- | utils/optimin/EVALMAXSAT_VERSION | 1 | ||||
m--------- | utils/optimin/EvalMaxSAT | 0 | ||||
-rw-r--r-- | utils/optimin/README.md | 94 | ||||
-rwxr-xr-x | utils/optimin/build_optimin.sh | 131 | ||||
-rw-r--r-- | utils/optimin/src/CMakeLists.txt | 12 | ||||
-rw-r--r-- | utils/optimin/src/OptiMin.cpp | 702 | ||||
-rwxr-xr-x | utils/qbdi_mode/README.md | 2 |
20 files changed, 992 insertions, 679 deletions
diff --git a/utils/README.md b/utils/README.md index 1c401237..7fd6177a 100644 --- a/utils/README.md +++ b/utils/README.md @@ -2,6 +2,9 @@ Here's a quick overview of the stuff you can find in this directory: + - aflpp_driver - easily instrument LLVMFuzzerTestOneInput() + harnesses. + - afl_network_proxy - fuzz a target over the network: afl-fuzz on a host, target on an embedded system. @@ -15,12 +18,16 @@ Here's a quick overview of the stuff you can find in this directory: - afl_untracer - fuzz binary-only libraries much faster but with less coverage than qemu_mode + - analysis_scripts - random -o out analysis scripts + - argv_fuzzing - a simple wrapper to allow cmdline to be fuzzed (e.g., to test setuid programs). - asan_cgroups - a contributed script to simplify fuzzing ASAN binaries with robust memory limits on Linux. + - autodict_ql - generate dictionary files from source code. + - bash_shellshock - a simple hack used to find a bunch of post-Shellshock bugs in bash. @@ -38,14 +45,24 @@ Here's a quick overview of the stuff you can find in this directory: C and Python. Note: They were moved to ../custom_mutators/examples/ + - defork - intercept fork() in targets + - distributed_fuzzing - a sample script for synchronizing fuzzer instances across multiple machines (see parallel_fuzzing.md). + - libdislocator - like ASAN but lightweight. + + - libtokencap - collect string tokens for a dictionary. + - libpng_no_checksum - a sample patch for removing CRC checks in libpng. + - optimin - An optimal corpus minimizer. + - persistent_mode - an example of how to use the LLVM persistent process mode to speed up certain fuzzing jobs. + - qemu_persistent_hook - persistent mode support module for qemu. + - socket_fuzzing - a LD_PRELOAD library 'redirects' a socket to stdin for fuzzing access with afl++ diff --git a/utils/afl_frida/GNUmakefile b/utils/afl_frida/GNUmakefile deleted file mode 100644 index 8b56415b..00000000 --- a/utils/afl_frida/GNUmakefile +++ /dev/null @@ -1,23 +0,0 @@ -ifdef DEBUG - OPT=-O0 -D_DEBUG=\"1\" -else - OPT=-O3 -funroll-loops -endif - -all: afl-frida libtestinstr.so - -libfrida-gum.a: - @echo Download and extract frida-gum-devkit-VERSION-PLATFORM.tar.xz for your platform from https://github.com/frida/frida/releases/latest - @exit 1 - -afl-frida: afl-frida.c libfrida-gum.a - $(CC) -g $(OPT) -o afl-frida -Wno-format -Wno-pointer-sign -I. -fpermissive -fPIC afl-frida.c ../../afl-compiler-rt.o libfrida-gum.a -ldl -lresolv -pthread - -libtestinstr.so: libtestinstr.c - $(CC) -g -O0 -fPIC -o libtestinstr.so -shared libtestinstr.c - -clean: - rm -f afl-frida *~ core *.o libtestinstr.so - -deepclean: clean - rm -f libfrida-gum.a frida-gum* diff --git a/utils/afl_frida/Makefile b/utils/afl_frida/Makefile deleted file mode 100644 index 0b306dde..00000000 --- a/utils/afl_frida/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -all: - @echo please use GNU make, thanks! diff --git a/utils/afl_frida/README.md b/utils/afl_frida/README.md deleted file mode 100644 index 68b62009..00000000 --- a/utils/afl_frida/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# afl-frida - faster fuzzing of binary-only libraries - -## Introduction - -afl-frida is an example skeleton file which can easily be used to fuzz -a closed source library. - -It requires less memory and is x5-10 faster than qemu_mode but does not -provide interesting features like compcov or cmplog. - -## How-to - -### Modify afl-frida.c - -Read and modify afl-frida.c then `make`. -To adapt afl-frida.c to your needs, read the header of the file and then -search and edit the `STEP 1`, `STEP 2` and `STEP 3` locations. - -### Fuzzing - -Example (after modifying afl-frida.c to your needs and compile it): -``` -LD_LIBRARY_PATH=/path/to/the/target/library/ afl-fuzz -i in -o out -- ./afl-frida -``` -(or even remote via afl-network-proxy). - -# Speed and stability - -The speed is very good, about x12 of fork() qemu_mode. -However the stability is low. Reason is currently unknown. - -# Background - -This code is copied for a larger part from https://github.com/meme/hotwax diff --git a/utils/afl_frida/afl-frida.c b/utils/afl_frida/afl-frida.c deleted file mode 100644 index e49d6f42..00000000 --- a/utils/afl_frida/afl-frida.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - american fuzzy lop++ - afl-frida skeleton example - ------------------------------------------------- - - Copyright 2020 AFLplusplus Project. All rights reserved. - - Written mostly by meme -> https://github.com/meme/hotwax - - Modifications by Marc Heuse <mh@mh-sec.de> - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - HOW-TO - ====== - - You only need to change the following: - - 1. set the defines and function call parameters. - 2. dl load the library you want to fuzz, lookup the functions you need - and setup the calls to these. - 3. in the while loop you call the functions in the necessary order - - incl the cleanup. the cleanup is important! - - Just look these steps up in the code, look for "// STEP x:" - -*/ - -#include <stdio.h> -#include <stdint.h> -#include <unistd.h> -#include <stdint.h> -#include <stddef.h> -#include <sys/shm.h> -#include <dlfcn.h> - -#ifdef __APPLE__ - #include <mach/mach.h> - #include <mach-o/dyld_images.h> -#else - #include <sys/wait.h> - #include <sys/personality.h> -#endif - -int debug = 0; - -// STEP 1: - -// The presets are for the example libtestinstr.so: - -/* What is the name of the library to fuzz */ -#define TARGET_LIBRARY "libtestinstr.so" - -/* What is the name of the function to fuzz */ -#define TARGET_FUNCTION "testinstr" - -/* here you need to specify the parameter for the target function */ -static void *(*o_function)(uint8_t *, int); - -// END STEP 1 - -#include "frida-gum.h" - -void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output, - gpointer user_data); -void afl_setup(void); -void afl_start_forkserver(void); -int __afl_persistent_loop(unsigned int max_cnt); - -#include "../../config.h" - -// Shared memory fuzzing. -int __afl_sharedmem_fuzzing = 1; -extern unsigned int * __afl_fuzz_len; -extern unsigned char *__afl_fuzz_ptr; - -// Notify AFL about persistent mode. -static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##\0"; -int __afl_persistent_loop(unsigned int); - -// Notify AFL about deferred forkserver. -static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##\0"; -void __afl_manual_init(); - -// Because we do our own logging. -extern uint8_t * __afl_area_ptr; -static __thread guint64 previous_pc; - -// Frida stuff below. -typedef struct { - - GumAddress base_address; - guint64 code_start, code_end; - GumAddress current_log_impl; - uint64_t afl_prev_loc; - -} range_t; - -inline static void afl_maybe_log(guint64 current_pc) { - - // fprintf(stderr, "PC: %p ^ %p\n", current_pc, previous_pc); - - current_pc = (current_pc >> 4) ^ (current_pc << 8); - current_pc &= MAP_SIZE - 1; - - __afl_area_ptr[current_pc ^ previous_pc]++; - previous_pc = current_pc >> 1; - -} - -#ifdef __x86_64__ - -static const guint8 afl_maybe_log_code[] = { - - 0x9c, // pushfq - 0x50, // push rax - 0x51, // push rcx - 0x52, // push rdx - 0x56, // push rsi - - 0x89, 0xf8, // mov eax, edi - 0xc1, 0xe0, 0x08, // shl eax, 8 - 0xc1, 0xef, 0x04, // shr edi, 4 - 0x31, 0xc7, // xor edi, eax - 0x0f, 0xb7, 0xc7, // movzx eax, di - 0x48, 0x8d, 0x0d, 0x30, 0x00, 0x00, 0x00, // lea rcx, sym._afl_area_ptr_ptr - 0x48, 0x8b, 0x09, // mov rcx, qword [rcx] - 0x48, 0x8b, 0x09, // mov rcx, qword [rcx] - 0x48, 0x8d, 0x15, 0x1b, 0x00, 0x00, 0x00, // lea rdx, sym._afl_prev_loc_ptr - 0x48, 0x8b, 0x32, // mov rsi, qword [rdx] - 0x48, 0x8b, 0x36, // mov rsi, qword [rsi] - 0x48, 0x31, 0xc6, // xor rsi, rax - 0xfe, 0x04, 0x31, // inc byte [rcx + rsi] - - 0x48, 0xd1, 0xe8, // shr rax, 1 - 0x48, 0x8b, 0x0a, // mov rcx, qword [rdx] - 0x48, 0x89, 0x01, // mov qword [rcx], rax - - 0x5e, // pop rsi - 0x5a, // pop rdx - 0x59, // pop rcx - 0x58, // pop rax - 0x9d, // popfq - - 0xc3, // ret - // Read-only data goes here: - // uint64_t* afl_prev_loc_ptr - // uint8_t** afl_area_ptr_ptr - // unsigned int afl_instr_rms - -}; - -#else - -static void on_basic_block(GumCpuContext *context, gpointer user_data) { - - afl_maybe_log((guint64)user_data); - -} - -#endif - -void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output, - gpointer user_data) { - - range_t *range = (range_t *)user_data; - - const cs_insn *instr; - gboolean begin = TRUE; - while (gum_stalker_iterator_next(iterator, &instr)) { - - if (begin) { - - if (instr->address >= range->code_start && - instr->address <= range->code_end) { - -#ifdef __x86_64__ - GumX86Writer *cw = output->writer.x86; - if (range->current_log_impl == 0 || - !gum_x86_writer_can_branch_directly_between( - cw->pc, range->current_log_impl) || - !gum_x86_writer_can_branch_directly_between( - cw->pc + 128, range->current_log_impl)) { - - gconstpointer after_log_impl = cw->code + 1; - - gum_x86_writer_put_jmp_near_label(cw, after_log_impl); - - range->current_log_impl = cw->pc; - gum_x86_writer_put_bytes(cw, afl_maybe_log_code, - sizeof(afl_maybe_log_code)); - - uint64_t *afl_prev_loc_ptr = &range->afl_prev_loc; - uint8_t **afl_area_ptr_ptr = &__afl_area_ptr; - gum_x86_writer_put_bytes(cw, (const guint8 *)&afl_prev_loc_ptr, - sizeof(afl_prev_loc_ptr)); - gum_x86_writer_put_bytes(cw, (const guint8 *)&afl_area_ptr_ptr, - sizeof(afl_area_ptr_ptr)); - gum_x86_writer_put_label(cw, after_log_impl); - - } - - gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, - -GUM_RED_ZONE_SIZE); - gum_x86_writer_put_push_reg(cw, GUM_REG_RDI); - gum_x86_writer_put_mov_reg_address(cw, GUM_REG_RDI, - GUM_ADDRESS(instr->address)); - gum_x86_writer_put_call_address(cw, range->current_log_impl); - gum_x86_writer_put_pop_reg(cw, GUM_REG_RDI); - gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, - GUM_RED_ZONE_SIZE); -#else - gum_stalker_iterator_put_callout(iterator, on_basic_block, - (gpointer)instr->address, NULL); -#endif - begin = FALSE; - - } - - } - - gum_stalker_iterator_keep(iterator); - - } - -} - -/* Because this CAN be called more than once, it will return the LAST range */ -static int enumerate_ranges(const GumRangeDetails *details, - gpointer user_data) { - - GumMemoryRange *code_range = (GumMemoryRange *)user_data; - memcpy(code_range, details->range, sizeof(*code_range)); - return 0; - -} - -int main(int argc, char **argv) { - -#ifndef __APPLE__ - (void)personality(ADDR_NO_RANDOMIZE); // disable ASLR -#endif - - // STEP 2: load the library you want to fuzz and lookup the functions, - // inclusive of the cleanup functions. - // If there is just one function, then there is nothing to change - // or add here. - - void *dl = NULL; - if (argc > 2) { - - dl = dlopen(argv[1], RTLD_LAZY); - - } else { - - dl = dlopen(TARGET_LIBRARY, RTLD_LAZY); - - } - - if (!dl) { - - if (argc > 2) - fprintf(stderr, "Could not load %s\n", argv[1]); - else - fprintf(stderr, "Could not load %s\n", TARGET_LIBRARY); - exit(-1); - - } - - if (argc > 2) - o_function = dlsym(dl, argv[2]); - else - o_function = dlsym(dl, TARGET_FUNCTION); - if (!o_function) { - - if (argc > 2) - fprintf(stderr, "Could not find function %s\n", argv[2]); - else - fprintf(stderr, "Could not find function %s\n", TARGET_FUNCTION); - exit(-1); - - } - - // END STEP 2 - - if (!getenv("AFL_FRIDA_TEST_INPUT")) { - - gum_init_embedded(); - if (!gum_stalker_is_supported()) { - - gum_deinit_embedded(); - return 1; - - } - - GumStalker *stalker = gum_stalker_new(); - - GumAddress base_address; - if (argc > 2) - base_address = gum_module_find_base_address(argv[1]); - else - base_address = gum_module_find_base_address(TARGET_LIBRARY); - GumMemoryRange code_range; - if (argc > 2) - gum_module_enumerate_ranges(argv[1], GUM_PAGE_RX, enumerate_ranges, - &code_range); - else - gum_module_enumerate_ranges(TARGET_LIBRARY, GUM_PAGE_RX, enumerate_ranges, - &code_range); - - guint64 code_start = code_range.base_address; - guint64 code_end = code_range.base_address + code_range.size; - range_t instr_range = {0, code_start, code_end, 0, 0}; - - printf("Frida instrumentation: base=0x%lx instrumenting=0x%lx-%lx\n", - base_address, code_start, code_end); - if (!code_start || !code_end) { - - if (argc > 2) - fprintf(stderr, "Error: no valid memory address found for %s\n", - argv[1]); - else - fprintf(stderr, "Error: no valid memory address found for %s\n", - TARGET_LIBRARY); - exit(-1); - - } - - GumStalkerTransformer *transformer = - gum_stalker_transformer_make_from_callback(instr_basic_block, - &instr_range, NULL); - - // to ensure that the signatures are not optimized out - memcpy(__afl_area_ptr, (void *)AFL_PERSISTENT, sizeof(AFL_PERSISTENT)); - memcpy(__afl_area_ptr + 32, (void *)AFL_DEFER_FORKSVR, - sizeof(AFL_DEFER_FORKSVR)); - __afl_manual_init(); - - // - // any expensive target library initialization that has to be done just once - // - put that here - // - - gum_stalker_follow_me(stalker, transformer, NULL); - - while (__afl_persistent_loop(UINT32_MAX) != 0) { - - previous_pc = 0; // Required! - -#ifdef _DEBUG - fprintf(stderr, "CLIENT crc: %016llx len: %u\n", - hash64(__afl_fuzz_ptr, *__afl_fuzz_len), *__afl_fuzz_len); - fprintf(stderr, "RECV:"); - for (int i = 0; i < *__afl_fuzz_len; i++) - fprintf(stderr, "%02x", __afl_fuzz_ptr[i]); - fprintf(stderr, "\n"); -#endif - - // STEP 3: ensure the minimum length is present and setup the target - // function to fuzz. - - if (*__afl_fuzz_len > 0) { - - __afl_fuzz_ptr[*__afl_fuzz_len] = 0; // if you need to null terminate - (*o_function)(__afl_fuzz_ptr, *__afl_fuzz_len); - - } - - // END STEP 3 - - } - - gum_stalker_unfollow_me(stalker); - - while (gum_stalker_garbage_collect(stalker)) - g_usleep(10000); - - g_object_unref(stalker); - g_object_unref(transformer); - gum_deinit_embedded(); - - } else { - - char buf[8 * 1024] = {0}; - int count = read(0, buf, sizeof(buf)); - buf[8 * 1024 - 1] = '\0'; - (*o_function)(buf, count); - - } - - return 0; - -} - diff --git a/utils/afl_frida/afl-frida.h b/utils/afl_frida/afl-frida.h deleted file mode 100644 index efa3440f..00000000 --- a/utils/afl_frida/afl-frida.h +++ /dev/null @@ -1,53 +0,0 @@ -extern int is_persistent; - -G_BEGIN_DECLS - -#define GUM_TYPE_FAKE_EVENT_SINK (gum_fake_event_sink_get_type()) - -G_DECLARE_FINAL_TYPE(GumFakeEventSink, gum_fake_event_sink, GUM, - FAKE_EVENT_SINK, GObject) - -struct _GumFakeEventSink { - - GObject parent; - GumEventType mask; - -}; - -GumEventSink *gum_fake_event_sink_new(void); -void gum_fake_event_sink_reset(GumFakeEventSink *self); - -G_END_DECLS - -typedef struct { - - GumAddress base_address; - guint64 code_start, code_end; - -} range_t; - -void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output, - gpointer user_data); -#pragma once - -void afl_setup(void); -void afl_start_forkserver(void); -int __afl_persistent_loop(unsigned int max_cnt); - -inline static inline void afl_maybe_log(guint64 current_pc) { - - extern unsigned int afl_instr_rms; - extern uint8_t * afl_area_ptr; - - static __thread guint64 previous_pc; - - current_pc = (current_pc >> 4) ^ (current_pc << 8); - current_pc &= MAP_SIZE - 1; - - if (current_pc >= afl_instr_rms) return; - - afl_area_ptr[current_pc ^ previous_pc]++; - previous_pc = current_pc >> 1; - -} - diff --git a/utils/afl_frida/android/README.md b/utils/afl_frida/android/README.md deleted file mode 100644 index 044b48a1..00000000 --- a/utils/afl_frida/android/README.md +++ /dev/null @@ -1 +0,0 @@ -For android, frida-gum package (ex. https://github.com/frida/frida/releases/download/14.2.6/frida-gum-devkit-14.2.6-android-arm64.tar.xz) is needed to be extracted in the directory. diff --git a/utils/afl_frida/android/frida-gum-example.c b/utils/afl_frida/android/frida-gum-example.c deleted file mode 100644 index 14d98248..00000000 --- a/utils/afl_frida/android/frida-gum-example.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Compile with: - * - * clang -fPIC -DANDROID -ffunction-sections -fdata-sections -Os -pipe -g3 frida-gum-example.c -o frida-gum-example -L. -lfrida-gum -llog -ldl -lm -pthread -Wl,--gc-sections,-z,noexecstack,-z,relro,-z,now -fuse-ld=gold -fuse-ld=gold -Wl,--icf=all - * - * Visit https://frida.re to learn more about Frida. - */ - -#include "frida-gum.h" - -#include <fcntl.h> -#include <unistd.h> - -typedef struct _ExampleListener ExampleListener; -typedef enum _ExampleHookId ExampleHookId; - -struct _ExampleListener -{ - GObject parent; - - guint num_calls; -}; - -enum _ExampleHookId -{ - EXAMPLE_HOOK_OPEN, - EXAMPLE_HOOK_CLOSE -}; - -static void example_listener_iface_init (gpointer g_iface, gpointer iface_data); - -#define EXAMPLE_TYPE_LISTENER (example_listener_get_type ()) -G_DECLARE_FINAL_TYPE (ExampleListener, example_listener, EXAMPLE, LISTENER, GObject) -G_DEFINE_TYPE_EXTENDED (ExampleListener, - example_listener, - G_TYPE_OBJECT, - 0, - G_IMPLEMENT_INTERFACE (GUM_TYPE_INVOCATION_LISTENER, - example_listener_iface_init)) - -int -main (int argc, - char * argv[]) -{ - GumInterceptor * interceptor; - GumInvocationListener * listener; - - gum_init_embedded (); - - interceptor = gum_interceptor_obtain (); - listener = g_object_new (EXAMPLE_TYPE_LISTENER, NULL); - - gum_interceptor_begin_transaction (interceptor); - gum_interceptor_attach (interceptor, - GSIZE_TO_POINTER (gum_module_find_export_by_name (NULL, "open")), - listener, - GSIZE_TO_POINTER (EXAMPLE_HOOK_OPEN)); - gum_interceptor_attach (interceptor, - GSIZE_TO_POINTER (gum_module_find_export_by_name (NULL, "close")), - listener, - GSIZE_TO_POINTER (EXAMPLE_HOOK_CLOSE)); - gum_interceptor_end_transaction (interceptor); - - close (open ("/etc/hosts", O_RDONLY)); - close (open ("/etc/fstab", O_RDONLY)); - - g_print ("[*] listener got %u calls\n", EXAMPLE_LISTENER (listener)->num_calls); - - gum_interceptor_detach (interceptor, listener); - - close (open ("/etc/hosts", O_RDONLY)); - close (open ("/etc/fstab", O_RDONLY)); - - g_print ("[*] listener still has %u calls\n", EXAMPLE_LISTENER (listener)->num_calls); - - g_object_unref (listener); - g_object_unref (interceptor); - - gum_deinit_embedded (); - - return 0; -} - -static void -example_listener_on_enter (GumInvocationListener * listener, - GumInvocationContext * ic) -{ - ExampleListener * self = EXAMPLE_LISTENER (listener); - ExampleHookId hook_id = GUM_IC_GET_FUNC_DATA (ic, ExampleHookId); - - switch (hook_id) - { - case EXAMPLE_HOOK_OPEN: - g_print ("[*] open(\"%s\")\n", (const gchar *) gum_invocation_context_get_nth_argument (ic, 0)); - break; - case EXAMPLE_HOOK_CLOSE: - g_print ("[*] close(%d)\n", GPOINTER_TO_INT (gum_invocation_context_get_nth_argument (ic, 0))); - break; - } - - self->num_calls++; -} - -static void -example_listener_on_leave (GumInvocationListener * listener, - GumInvocationContext * ic) -{ -} - -static void -example_listener_class_init (ExampleListenerClass * klass) -{ - (void) EXAMPLE_IS_LISTENER; - (void) glib_autoptr_cleanup_ExampleListener; -} - -static void -example_listener_iface_init (gpointer g_iface, - gpointer iface_data) -{ - GumInvocationListenerInterface * iface = g_iface; - - iface->on_enter = example_listener_on_enter; - iface->on_leave = example_listener_on_leave; -} - -static void -example_listener_init (ExampleListener * self) -{ -} diff --git a/utils/afl_frida/libtestinstr.c b/utils/afl_frida/libtestinstr.c deleted file mode 100644 index 96b1cf21..00000000 --- a/utils/afl_frida/libtestinstr.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - american fuzzy lop++ - a trivial program to test the build - -------------------------------------------------------- - Originally written by Michal Zalewski - Copyright 2014 Google Inc. All rights reserved. - Copyright 2019-2020 AFLplusplus Project. All rights reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -void testinstr(char *buf, int len) { - - if (len < 1) return; - buf[len] = 0; - - // we support three input cases - if (buf[0] == '0') - printf("Looks like a zero to me!\n"); - else if (buf[0] == '1') - printf("Pretty sure that is a one!\n"); - else - printf("Neither one or zero? How quaint!\n"); - -} - diff --git a/utils/afl_network_proxy/README.md b/utils/afl_network_proxy/README.md index a5ac3578..05659c45 100644 --- a/utils/afl_network_proxy/README.md +++ b/utils/afl_network_proxy/README.md @@ -32,7 +32,7 @@ e.g.: afl-network-server -i 1111 -m 25M -t 1000 -- /bin/target -f @@ ``` -### on the (afl-fuzz) master +### on the (afl-fuzz) main node Just run afl-fuzz with your normal options, however the target should be `afl-network-client` with the IP and PORT of the `afl-network-server` and diff --git a/utils/aflpp_driver/aflpp_driver_test.c b/utils/aflpp_driver/aflpp_driver_test.c index fe05b4f8..527ba57b 100644 --- a/utils/aflpp_driver/aflpp_driver_test.c +++ b/utils/aflpp_driver/aflpp_driver_test.c @@ -2,8 +2,6 @@ #include <stdlib.h> #include <stdint.h> -#include "hash.h" - void __attribute__((noinline)) crashme(const uint8_t *Data, size_t Size) { if (Size < 5) return; diff --git a/utils/optimin/.gitignore b/utils/optimin/.gitignore new file mode 100644 index 00000000..46f42f8f --- /dev/null +++ b/utils/optimin/.gitignore @@ -0,0 +1,11 @@ +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps diff --git a/utils/optimin/CMakeLists.txt b/utils/optimin/CMakeLists.txt new file mode 100644 index 00000000..b45dd004 --- /dev/null +++ b/utils/optimin/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.10) + +project(optimin + LANGUAGES CXX + DESCRIPTION "MaxSAT-based fuzzing corpus minimizer" +) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") + +# Add LLVM +find_package(LLVM REQUIRED CONFIG) +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + +include_directories(${LLVM_INCLUDE_DIRS}) +add_definitions(${LLVM_DEFINITIONS} -DNDEBUG) + +add_subdirectory(EvalMaxSAT) +add_subdirectory(src) diff --git a/utils/optimin/EVALMAXSAT_VERSION b/utils/optimin/EVALMAXSAT_VERSION new file mode 100644 index 00000000..d836ff1c --- /dev/null +++ b/utils/optimin/EVALMAXSAT_VERSION @@ -0,0 +1 @@ +440bf90edf88f6ab940934129e3c5b3b93764295 diff --git a/utils/optimin/EvalMaxSAT b/utils/optimin/EvalMaxSAT new file mode 160000 +Subproject 440bf90edf88f6ab940934129e3c5b3b9376429 diff --git a/utils/optimin/README.md b/utils/optimin/README.md new file mode 100644 index 00000000..c6f2af06 --- /dev/null +++ b/utils/optimin/README.md @@ -0,0 +1,94 @@ +# OptiMin + +OptiMin is a corpus minimizer that uses a +[MaxSAT](https://en.wikipedia.org/wiki/Maximum_satisfiability_problem) solver +to identify a subset of functionally distinct files that exercise different code +paths in a target program. + +Unlike most corpus minimizers, such as `afl-cmin`, OptiMin does not rely on +heuristic and/or greedy algorithms to identify these functionally distinct +files. This means that minimized corpora are generally much smaller than those +produced by other tools. + +## Building + +To build the `optimin` just execute the `build_optimin.sh` script. + +## Running + +Running `optimin` is the same as running `afl-cmin`: + +``` +./optimin -h +OVERVIEW: Optimal corpus minimizer +USAGE: optimin [options] <target program> [target args...] + +OPTIONS: + +Color Options: + + --color - Use colors in output (default=autodetect) + +General options: + + -C - Keep crashing inputs, reject everything else + -O - Use binary-only instrumentation (FRIDA mode) + -Q - Use binary-only instrumentation (QEMU mode) + -U - Use unicorn-based instrumentation (unicorn mode) + -f - Include edge hit counts + -i dir - Input directory + -m megs - Memory limit for child process (default=none) + -o dir - Output directory + -p - Display progress bar + -t msec - Run time limit for child process (default=5000) + -w csv - Weights file + +Generic Options: + + --help - Display available options (--help-hidden for more) + --help-list - Display list of available options (--help-list-hidden for more) + --version - Display the version of this program +``` + +Example: `optimin -i files -o seeds -- ./target @@` + +### Weighted Minimizations + +OptiMin allows for weighted minimizations. For examples, seeds can be weighted +by file size (or execution time), thus preferencing the selection of smaller (or +faster) seeds. + +To perform a weighted minimization, supply a CSV file with the `-w` option. This +CSV file is formatted as follows: + +``` +SEED_1,WEIGHT_1 +SEED_2,WEIGHT_2 +... +SEED_N,WEIGHT_N +``` + +Where `SEED_N` is the file name (**not** path) of a seed in the input directory, +and `WEIGHT_N` is an integer weight. + +## Further Details and Citation + +For more details, please see the paper [Seed Selection for Successful +Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). If you use OptiMin in +your research, please cite this paper. + +Bibtex: + +```bibtex +@inproceedings{Herrera:2021:FuzzSeedSelection, + author = {Adrian Herrera and Hendra Gunadi and Shane Magrath and Michael Norrish and Mathias Payer and Antony L. Hosking}, + title = {Seed Selection for Successful Fuzzing}, + booktitle = {30th ACM SIGSOFT International Symposium on Software Testing and Analysis}, + series = {ISSTA}, + year = {2021}, + pages = {230--243}, + numpages = {14}, + location = {Virtual, Denmark}, + publisher = {Association for Computing Machinery}, +} +``` diff --git a/utils/optimin/build_optimin.sh b/utils/optimin/build_optimin.sh new file mode 100755 index 00000000..9480f966 --- /dev/null +++ b/utils/optimin/build_optimin.sh @@ -0,0 +1,131 @@ +#!/bin/sh +# +# american fuzzy lop++ - optimin build script +# ------------------------------------------------ +# +# Originally written by Nathan Voss <njvoss99@gmail.com> +# +# Adapted from code by Andrew Griffiths <agriffiths@google.com> and +# Michal Zalewski +# +# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co> +# +# Copyright 2017 Battelle Memorial Institute. All rights reserved. +# Copyright 2019-2020 AFLplusplus Project. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# This script builds the OptiMin corpus minimizer. + +EVALMAXSAT_VERSION="$(cat ./EVALMAXSAT_VERSION)" +EVALMAXSAT_REPO="https://github.com/FlorentAvellaneda/EvalMaxSAT" + +echo "=================================================" +echo "OptiMin build script" +echo "=================================================" +echo + +echo "[*] Performing basic sanity checks..." + +PLT=`uname -s` + +if [ ! -f "../../config.h" ]; then + + echo "[-] Error: key files not found - wrong working directory?" + exit 1 + +fi + +LLVM_CONFIG="${LLVM_CONFIG:-llvm-config}" +CMAKECMD=cmake +MAKECMD=make +TARCMD=tar + +if [ "$PLT" = "Darwin" ]; then + CORES=`sysctl -n hw.ncpu` + TARCMD=tar +fi + +if [ "$PLT" = "FreeBSD" ]; then + MAKECMD=gmake + CORES=`sysctl -n hw.ncpu` + TARCMD=gtar +fi + +if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then + MAKECMD=gmake + CORES=`sysctl -n hw.ncpu` + TARCMD=gtar +fi + +PREREQ_NOTFOUND= +for i in git $CMAKECMD $MAKECMD $TARCMD; do + + T=`command -v "$i" 2>/dev/null` + + if [ "$T" = "" ]; then + + echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar." + PREREQ_NOTFOUND=1 + + fi + +done + +if echo "$CC" | grep -qF /afl-; then + + echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool." + PREREQ_NOTFOUND=1 + +fi + +if [ "$PREREQ_NOTFOUND" = "1" ]; then + exit 1 +fi + +echo "[+] All checks passed!" + +echo "[*] Making sure EvalMaxSAT is checked out" + +git status 1>/dev/null 2>/dev/null +if [ $? -eq 0 ]; then + echo "[*] initializing EvalMaxSAT submodule" + git submodule init || exit 1 + git submodule update ./EvalMaxSAT 2>/dev/null # ignore errors +else + echo "[*] cloning EvalMaxSAT" + test -d EvalMaxSAT || { + CNT=1 + while [ '!' -d EvalMaxSAT -a "$CNT" -lt 4 ]; do + echo "Trying to clone EvalMaxSAT (attempt $CNT/3)" + git clone "$GRAMMAR_REPO" + CNT=`expr "$CNT" + 1` + done + } +fi + +test -d EvalMaxSAT || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +echo "[+] Got EvalMaxSAT." + +cd "EvalMaxSAT" || exit 1 +echo "[*] Checking out $EVALMAXSAT_VERSION" +sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null +git checkout "$EVALMAXSAT_VERSION" || exit 1 +cd .. + +echo +echo +echo "[+] EvalMaxSAT successfully prepared!" +echo "[+] Building OptiMin now." +mkdir -p build +cd build || exit 1 +cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` || exit 1 +make -j$CORES || exit 1 +cd .. +echo +cp -fv build/src/optimin . || exit 1 +echo "[+] OptiMin successfully built!" diff --git a/utils/optimin/src/CMakeLists.txt b/utils/optimin/src/CMakeLists.txt new file mode 100644 index 00000000..f31ceeaf --- /dev/null +++ b/utils/optimin/src/CMakeLists.txt @@ -0,0 +1,12 @@ +add_executable(optimin OptiMin.cpp) + +foreach(LIB MaLib EvalMaxSAT glucose) + target_include_directories(optimin PRIVATE + "${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src") + target_link_libraries(optimin ${LIB}) +endforeach(LIB) + +llvm_map_components_to_libnames(LLVM_LIBS support) +target_link_libraries(optimin ${LLVM_LIBS}) + +install(TARGETS optimin RUNTIME DESTINATION bin) diff --git a/utils/optimin/src/OptiMin.cpp b/utils/optimin/src/OptiMin.cpp new file mode 100644 index 00000000..ce1fb850 --- /dev/null +++ b/utils/optimin/src/OptiMin.cpp @@ -0,0 +1,702 @@ +/* + * OptiMin, an optimal fuzzing corpus minimizer. + * + * Author: Adrian Herrera + */ + +#include <cstdint> +#include <cstdlib> +#include <vector> + +#include <llvm/ADT/DenseSet.h> +#include <llvm/ADT/DenseMap.h> +#include <llvm/ADT/SmallVector.h> +#include <llvm/ADT/StringExtras.h> +#include <llvm/ADT/StringMap.h> +#include <llvm/Support/Chrono.h> +#include <llvm/Support/CommandLine.h> +#include <llvm/Support/Error.h> +#include <llvm/Support/FileSystem.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Support/Path.h> +#include <llvm/Support/Program.h> +#include <llvm/Support/WithColor.h> + +#include "EvalMaxSAT.h" + +using namespace llvm; + +namespace { + +// -------------------------------------------------------------------------- // +// Classes +// -------------------------------------------------------------------------- // + +/// Ensure seed weights default to 1 +class Weight { + + public: + Weight() : Weight(1){}; + Weight(uint32_t V) : Value(V){}; + + operator unsigned() const { + + return Value; + + } + + private: + const unsigned Value; + +}; + +// -------------------------------------------------------------------------- // +// Typedefs +// -------------------------------------------------------------------------- // + +/// AFL tuple (edge) ID +using AFLTupleID = uint32_t; + +/// Pair of tuple ID and hit count +using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>; + +/// Coverage for a given seed file +using AFLCoverageVector = std::vector<AFLTuple>; + +/// Map seed file paths to its coverage vector +using AFLCoverageMap = StringMap<AFLCoverageVector>; + +/// Map seed file paths to a weight +using WeightsMap = StringMap<Weight>; + +/// A seed identifier in the MaxSAT solver +using SeedID = int; + +/// Associates seed identifiers to seed files +using MaxSATSeeds = + SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>; + +/// Set of literal identifiers +using MaxSATSeedSet = DenseSet<SeedID>; + +/// Maps tuple IDs to the literal identifiers that "cover" that tuple +using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>; + +// -------------------------------------------------------------------------- // +// Global variables +// -------------------------------------------------------------------------- // + +// This is based on the human class count in `count_class_human[256]` in +// `afl-showmap.c` +static constexpr uint32_t MAX_EDGE_FREQ = 8; + +// The maximum number of failures allowed when parsing a weights file +static constexpr unsigned MAX_WEIGHT_FAILURES = 5; + +static sys::TimePoint<> StartTime, EndTime; +static std::chrono::seconds Duration; + +static std::string ShowmapPath; +static bool TargetArgsHasAtAt = false; +static bool KeepTraces = false; +static bool SkipBinCheck = false; + +static const auto ErrMsg = [] { + + return WithColor(errs(), raw_ostream::RED, /*Bold=*/true) << "[-] "; + +}; + +static const auto WarnMsg = [] { + + return WithColor(errs(), raw_ostream::MAGENTA, /*Bold=*/true) << "[-] "; + +}; + +static const auto SuccMsg = [] { + + return WithColor(outs(), raw_ostream::GREEN, /*Bold=*/true) << "[+] "; + +}; + +static const auto StatMsg = [] { + + return WithColor(outs(), raw_ostream::BLUE, /*Bold=*/true) << "[*] "; + +}; + +static cl::opt<std::string> InputDir("i", cl::desc("Input directory"), + cl::value_desc("dir"), cl::Required); +static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"), + cl::value_desc("dir"), cl::Required); + +static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"), + cl::init(true)); +static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"), + cl::value_desc("csv")); + +static cl::opt<std::string> TargetProg(cl::Positional, + cl::desc("<target program>"), + cl::Required); +static cl::list<std::string> TargetArgs(cl::ConsumeAfter, + cl::desc("[target args...]")); + +static cl::opt<std::string> MemLimit( + "m", cl::desc("Memory limit for child process (default=none)"), + cl::value_desc("megs"), cl::init("none")); +static cl::opt<std::string> Timeout( + "t", cl::desc("Run time limit for child process (default=5000)"), + cl::value_desc("msec"), cl::init("5000")); + +static cl::opt<bool> CrashMode( + "C", cl::desc("Keep crashing inputs, reject everything else")); +static cl::opt<bool> FridaMode( + "O", cl::desc("Use binary-only instrumentation (FRIDA mode)")); +static cl::opt<bool> QemuMode( + "Q", cl::desc("Use binary-only instrumentation (QEMU mode)")); +static cl::opt<bool> UnicornMode( + "U", cl::desc("Use unicorn-based instrumentation (unicorn mode)")); + +} // anonymous namespace + +// -------------------------------------------------------------------------- // +// Helper functions +// -------------------------------------------------------------------------- // + +static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) { + + SmallVector<StringRef, 0> Lines; + MB.getBuffer().trim().split(Lines, '\n'); + + unsigned FailureCount = 0; + unsigned Weight = 0; + + for (const auto &Line : Lines) { + + const auto &[Seed, WeightStr] = Line.split(','); + + if (to_integer(WeightStr, Weight, 10)) { + + Weights.try_emplace(Seed, Weight); + + } else { + + if (FailureCount >= MAX_WEIGHT_FAILURES) { + ErrMsg() << "Too many failures. Aborting\n"; + std::exit(1); + } + + WarnMsg() << "Failed to read weight for '" << Seed << "'. Skipping...\n"; + FailureCount++; + + } + + } + +} + +static std::error_code readCov(const StringRef Trace, AFLCoverageVector &Cov) { + + const auto CovOrErr = MemoryBuffer::getFile(Trace); + if (const auto EC = CovOrErr.getError()) return EC; + + SmallVector<StringRef, 0> Lines; + CovOrErr.get()->getBuffer().trim().split(Lines, '\n'); + + AFLTupleID Edge = 0; + unsigned Freq = 0; + + for (const auto &Line : Lines) { + + const auto &[EdgeStr, FreqStr] = Line.split(':'); + + to_integer(EdgeStr, Edge, 10); + to_integer(FreqStr, Freq, 10); + Cov.push_back({Edge, Freq}); + + } + + return std::error_code(); + +} + +static Error runShowmap(AFLCoverageMap &CovMap, const StringRef Input, + bool BinCheck = false) { + + const bool InputIsFile = !sys::fs::is_directory(Input); + Optional<StringRef> Redirects[] = {None, None, None}; + + SmallString<32> TraceDir{OutputDir}; + sys::path::append(TraceDir, ".traces"); + + SmallString<32> Output{TraceDir}; + SmallString<32> StdinFile{TraceDir}; + + // ------------------------------------------------------------------------ // + // Prepare afl-showmap arguments + // + // If the given input is a file, then feed this directly into stdin. + // Otherwise, if it is a directory, specify this on the afl-showmap command + // line. + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 12> ShowmapArgs{ShowmapPath, "-q", + "-m", MemLimit, + "-t", Timeout}; + + if (InputIsFile) { + + StdinFile = Input; + sys::path::append(Output, + BinCheck ? ".run_test" : sys::path::filename(Input)); + + } else { + + sys::path::append(StdinFile, ".cur_input"); + ShowmapArgs.append({"-i", Input}); + + } + + + if (TargetArgsHasAtAt) { + + ShowmapArgs.append({"-A", StdinFile}); + Redirects[/* stdin */ 0] = "/dev/null"; + + } else if (InputIsFile) { + + Redirects[/* stdin */ 0] = Input; + + } + + if (FridaMode) ShowmapArgs.push_back("-O"); + if (QemuMode) ShowmapArgs.push_back("-Q"); + if (UnicornMode) ShowmapArgs.push_back("-U"); + + ShowmapArgs.append({"-o", Output, "--", TargetProg}); + ShowmapArgs.append(TargetArgs.begin(), TargetArgs.end()); + + // ------------------------------------------------------------------------ // + // Run afl-showmap + // ------------------------------------------------------------------------ // + + const int RC = sys::ExecuteAndWait(ShowmapPath, ShowmapArgs, + /*env=*/None, Redirects); + if (RC && !CrashMode) { + + ErrMsg() << "Exit code " << RC << " != 0 received from afl-showmap\n"; + return createStringError(inconvertibleErrorCode(), "afl-showmap failed"); + + } + + // ------------------------------------------------------------------------ // + // Parse afl-showmap output + // ------------------------------------------------------------------------ // + + AFLCoverageVector Cov; + std::error_code EC; + sys::fs::file_status Status; + + if (InputIsFile) { + + // Read a single output coverage file + if ((EC = readCov(Output, Cov))) { + + sys::fs::remove(Output); + return errorCodeToError(EC); + + } + + CovMap.try_emplace(sys::path::filename(Input), Cov); + if (!KeepTraces) sys::fs::remove(Output); + + } else { + + // Read a directory of output coverage files + for (sys::fs::recursive_directory_iterator Dir(TraceDir, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + + if (EC) return errorCodeToError(EC); + + const auto &Path = Dir->path(); + if ((EC = sys::fs::status(Path, Status))) return errorCodeToError(EC); + + switch (Status.type()) { + + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + Cov.clear(); + if ((EC = readCov(Path, Cov))) { + + sys::fs::remove(Path); + return errorCodeToError(EC); + + } + + CovMap.try_emplace(sys::path::filename(Path), Cov); + default: + // Ignore + break; + + } + + } + + if (!KeepTraces) sys::fs::remove_directories(TraceDir); + + } + + return Error::success(); + +} + +static inline void StartTimer() { + + StartTime = std::chrono::system_clock::now(); + +} + +static inline void EndTimer() { + + EndTime = std::chrono::system_clock::now(); + Duration = + std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime); + + SuccMsg() << " Completed in " << Duration.count() << " s\n"; + +} + +// -------------------------------------------------------------------------- // +// Main function +// -------------------------------------------------------------------------- // + +int main(int argc, char *argv[]) { + + WeightsMap Weights; + std::error_code EC; + + // ------------------------------------------------------------------------ // + // Parse command-line options and environment variables + // + // Also check the target arguments, as this determines how we run afl-showmap. + // ------------------------------------------------------------------------ // + + cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer"); + + KeepTraces = !!std::getenv("AFL_KEEP_TRACES"); + SkipBinCheck = !!std::getenv("AFL_SKIP_BIN_CHECK"); + const auto AFLPath = std::getenv("AFL_PATH"); + + if (CrashMode) ::setenv("AFL_CMIN_CRASHES_ONLY", "1", /*overwrite=*/true); + + for (const auto &Arg : TargetArgs) + if (Arg == "@@") TargetArgsHasAtAt = true; + + // ------------------------------------------------------------------------ // + // Find afl-showmap + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 16> EnvPaths; + + if (const char *PathEnv = std::getenv("PATH")) + SplitString(PathEnv, EnvPaths, ":"); + if (AFLPath) EnvPaths.push_back(AFLPath); + + const auto ShowmapOrErr = sys::findProgramByName("afl-showmap", EnvPaths); + if (ShowmapOrErr.getError()) { + + ErrMsg() << "Failed to find afl-showmap. Check your PATH\n"; + return 1; + + } + + ShowmapPath = *ShowmapOrErr; + + // ------------------------------------------------------------------------ // + // Parse weights + // + // Weights are stored in CSV file mapping a seed file name to an integer + // greater than zero. + // ------------------------------------------------------------------------ // + + if (WeightsFile != "") { + + StatMsg() << "Reading weights from '" << WeightsFile << "'...\n"; + StartTimer(); + + const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile); + if ((EC = WeightsOrErr.getError())) { + + ErrMsg() << "Failed to read weights from '" << WeightsFile + << "': " << EC.message() << '\n'; + return 1; + + } + + GetWeights(*WeightsOrErr.get(), Weights); + + EndTimer(); + + } + + // ------------------------------------------------------------------------ // + // Traverse input directory + // + // Find the seed files inside this directory (and subdirectories). + // ------------------------------------------------------------------------ // + + StatMsg() << "Locating seeds in '" << InputDir << "'...\n"; + StartTimer(); + + bool IsDirResult; + if ((EC = sys::fs::is_directory(InputDir, IsDirResult))) { + + ErrMsg() << "Invalid input directory '" << InputDir << "': " << EC.message() + << '\n'; + return 1; + + } + + sys::fs::file_status Status; + StringMap<std::string> SeedFiles; + + for (sys::fs::recursive_directory_iterator Dir(InputDir, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + + if (EC) { + + ErrMsg() << "Failed to traverse input directory '" << InputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + const auto &Path = Dir->path(); + if ((EC = sys::fs::status(Path, Status))) { + + ErrMsg() << "Failed to access '" << Path << "': " << EC.message() << '\n'; + return 1; + + } + + switch (Status.type()) { + + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + SeedFiles.try_emplace(sys::path::filename(Path), + sys::path::parent_path(Path)); + default: + /* Ignore */ + break; + + } + + } + + EndTimer(); + + if (SeedFiles.empty()) { + + ErrMsg() << "Failed to find any seed files in '" << InputDir << "'\n"; + return 1; + + } + + // ------------------------------------------------------------------------ // + // Setup output directory + // ------------------------------------------------------------------------ // + + SmallString<32> TraceDir{OutputDir}; + sys::path::append(TraceDir, ".traces"); + + if ((EC = sys::fs::remove_directories(TraceDir))) { + + ErrMsg() << "Failed to remove existing trace directory in '" << OutputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + if ((EC = sys::fs::create_directories(TraceDir))) { + + ErrMsg() << "Failed to create output directory '" << OutputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + // ------------------------------------------------------------------------ // + // Test the target binary + // ------------------------------------------------------------------------ // + + AFLCoverageMap CovMap; + + if (!SkipBinCheck) { + + const auto It = SeedFiles.begin(); + SmallString<32> TestSeed{It->second}; + sys::path::append(TestSeed, It->first()); + + StatMsg() << "Testing the target binary with '" << TestSeed << "`...\n"; + StartTimer(); + + if (auto Err = runShowmap(CovMap, TestSeed, /*BinCheck=*/true)) { + + ErrMsg() << "No instrumentation output detected \n"; + return 1; + + } + + EndTimer(); + SuccMsg() << "OK, " << CovMap.begin()->second.size() + << " tuples recorded\n"; + + } + + // ------------------------------------------------------------------------ // + // Generate seed coverage + // + // Iterate over the corpus directory, which should contain seed files. Execute + // these seeds in the target program to generate coverage information, and + // then store this coverage information in the appropriate data structures. + // ------------------------------------------------------------------------ // + + StatMsg() << "Running afl-showmap on " << SeedFiles.size() << " seeds...\n"; + StartTimer(); + + MaxSATSeeds SeedVars; + MaxSATCoverageMap SeedCoverage; + EvalMaxSAT Solver(/*nbMinimizeThread=*/0); + + CovMap.clear(); + if (auto Err = runShowmap(CovMap, InputDir)) { + + ErrMsg() << "Failed to generate coverage: " << Err << '\n'; + return 1; + + } + + for (const auto &SeedCov : CovMap) { + + // Create a variable to represent the seed + const SeedID Var = Solver.newVar(); + SeedVars.emplace_back(Var, SeedCov.first()); + + // Record the set of seeds that cover a particular edge + for (auto &[Edge, Freq] : SeedCov.second) { + + if (EdgesOnly) { + + // Ignore edge frequency + SeedCoverage[Edge].insert(Var); + + } else { + + // Executing edge `E` `N` times means that it was executed `N - 1` times + for (unsigned I = 0; I < Freq; ++I) + SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var); + + } + + } + + } + + EndTimer(); + + // ------------------------------------------------------------------------ // + // Set the hard and soft constraints in the solver + // ------------------------------------------------------------------------ // + + StatMsg() << "Generating constraints...\n"; + StartTimer(); + + size_t SeedCount = 0; + + // Ensure that at least one seed is selected that covers a particular edge + // (hard constraint) + std::vector<SeedID> Clauses; + for (const auto &[_, Seeds] : SeedCoverage) { + + if (Seeds.empty()) continue; + + Clauses.clear(); + for (const auto &Seed : Seeds) + Clauses.push_back(Seed); + + Solver.addClause(Clauses); + + } + + // Select the minimum number of seeds that cover a particular set of edges + // (soft constraint) + for (const auto &[Var, Seed] : SeedVars) + Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]); + + EndTimer(); + + // ------------------------------------------------------------------------ // + // Generate a solution + // ------------------------------------------------------------------------ // + + StatMsg() << "Solving...\n"; + StartTimer(); + + const bool Solved = Solver.solve(); + + EndTimer(); + + // ------------------------------------------------------------------------ // + // Save the solution + // + // This will copy the selected seeds to the given output directory. + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 64> Solution; + SmallString<32> InputSeed, OutputSeed; + + if (Solved) { + + for (const auto &[Var, Seed] : SeedVars) + if (Solver.getValue(Var) > 0) Solution.push_back(Seed); + + } else { + + ErrMsg() << "Failed to find an optimal solution for '" << InputDir << "'\n"; + return 1; + + } + + StatMsg() << "Copying " << Solution.size() << " seeds to '" << OutputDir + << "'...\n"; + StartTimer(); + + SeedCount = 0; + + for (const auto &Seed : Solution) { + + InputSeed = SeedFiles[Seed]; + sys::path::append(InputSeed, Seed); + + OutputSeed = OutputDir; + sys::path::append(OutputSeed, Seed); + + if ((EC = sys::fs::copy_file(InputSeed, OutputSeed))) { + + ErrMsg() << "Failed to copy '" << Seed << "' to '" << OutputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + } + + EndTimer(); + SuccMsg() << "Done!\n"; + + return 0; + +} + diff --git a/utils/qbdi_mode/README.md b/utils/qbdi_mode/README.md index cf5d3359..334199f2 100755 --- a/utils/qbdi_mode/README.md +++ b/utils/qbdi_mode/README.md @@ -2,7 +2,7 @@ NOTE: this code is outdated and first would need to be adapted to the current afl++ versions first. -Try afl_frida or fpicker [https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/) first, maybe they suite your need. +Try frida_mode/ or fpicker [https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/) first, maybe they suite your need. ## 1) Introduction |