diff options
-rw-r--r-- | instrumentation/afl-compiler-rt.o.c | 279 | ||||
-rw-r--r-- | src/afl-cc.c | 26 | ||||
-rw-r--r-- | utils/dynamic_covfilter/README.md | 55 | ||||
-rw-r--r-- | utils/dynamic_covfilter/make_symbol_list.py | 73 |
4 files changed, 393 insertions, 40 deletions
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 39a762b6..8e55d6a0 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -22,6 +22,10 @@ #define __USE_GNU #endif #include <dlfcn.h> + +__attribute__((weak)) void __sanitizer_symbolize_pc(void *, const char *fmt, + char *out_buf, + size_t out_buf_size); #endif #ifdef __ANDROID__ @@ -124,8 +128,8 @@ struct afl_module_info_t { uintptr_t base_address; // PC Guard start/stop - u32 start; - u32 stop; + u32 *start; + u32 *stop; // PC Table begin/end const uintptr_t *pcs_beg; @@ -147,6 +151,18 @@ afl_module_info_t *__afl_module_info = NULL; u32 __afl_pcmap_size = 0; uintptr_t *__afl_pcmap_ptr = NULL; + +typedef struct { + + uintptr_t start; + u32 len; + +} FilterPCEntry; + +u32 __afl_filter_pcs_size = 0; +FilterPCEntry *__afl_filter_pcs = NULL; +u8 *__afl_filter_pcs_module = NULL; + #endif // __AFL_CODE_COVERAGE /* 1 if we are running in afl, and the forkserver was started, else 0 */ @@ -1587,15 +1603,116 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { } #ifdef __AFL_CODE_COVERAGE -void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, - const uintptr_t *pcs_end) { +void afl_read_pc_filter_file(const char *filter_file) { - if (__afl_debug) { + FILE *file; + char ch; + + file = fopen(filter_file, "r"); + if (file == NULL) { + + perror("Error opening file"); + return; + + } + + // Check how many PCs we expect to read + while ((ch = fgetc(file)) != EOF) { + + if (ch == '\n') { __afl_filter_pcs_size++; } + + } + + // Rewind to actually read the PCs + fseek(file, 0, SEEK_SET); + + __afl_filter_pcs = malloc(__afl_filter_pcs_size * sizeof(FilterPCEntry)); + if (!__afl_filter_pcs) { + + perror("Error allocating PC array"); + return; + + } + + for (size_t i = 0; i < __afl_filter_pcs_size; i++) { + + fscanf(file, "%lx", &(__afl_filter_pcs[i].start)); + ch = fgetc(file); // Read tab + fscanf(file, "%u", &(__afl_filter_pcs[i].len)); + ch = fgetc(file); // Read tab + + if (!__afl_filter_pcs_module) { + + // Read the module name and store it. + // TODO: We only support one module here right now although + // there is technically no reason to support multiple modules + // in one go. + size_t max_module_len = 255; + size_t i = 0; + __afl_filter_pcs_module = malloc(max_module_len); + while (i < max_module_len - 1 && + (__afl_filter_pcs_module[i] = fgetc(file)) != '\t') { + + ++i; + + } - fprintf(stderr, "DEBUG: __sanitizer_cov_pcs_init called\n"); + __afl_filter_pcs_module[i] = '\0'; + fprintf(stderr, "DEBUGXXX: Read module name %s\n", + __afl_filter_pcs_module); + + } + + while ((ch = fgetc(file)) != '\n' && ch != EOF) + ; + + } + + fclose(file); + +} + +u32 locate_in_pcs(uintptr_t needle, u32 *index) { + + size_t lower_bound = 0; + size_t upper_bound = __afl_filter_pcs_size - 1; + + while (lower_bound < __afl_filter_pcs_size && lower_bound <= upper_bound) { + + size_t current_index = lower_bound + (upper_bound - lower_bound) / 2; + + if (__afl_filter_pcs[current_index].start <= needle) { + + if (__afl_filter_pcs[current_index].start + + __afl_filter_pcs[current_index].len > + needle) { + + // Hit + *index = current_index; + return 1; + + } else { + + lower_bound = current_index + 1; + + } + + } else { + + if (!current_index) { break; } + upper_bound = current_index - 1; + + } } + return 0; + +} + +void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, + const uintptr_t *pcs_end) { + // If for whatever reason, we cannot get dlinfo here, then pc_guard_init also // couldn't get it and we'd end up attributing to the wrong module. Dl_info dlinfo; @@ -1608,6 +1725,16 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, } + if (__afl_debug) { + + fprintf( + stderr, + "DEBUG: (%u) __sanitizer_cov_pcs_init called for module %s with %ld " + "PCs\n", + getpid(), dlinfo.dli_fname, pcs_end - pcs_beg); + + } + afl_module_info_t *last_module_info = __afl_module_info; while (last_module_info && last_module_info->next) { @@ -1623,34 +1750,78 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, } + if (strcmp(dlinfo.dli_fname, last_module_info->name)) { + + // This can happen with modules being loaded after the forkserver + // where we decide to not track the module. In that case we must + // not track it here either. + fprintf( + stderr, + "WARNING: __sanitizer_cov_pcs_init module info mismatch: %s vs %s\n", + dlinfo.dli_fname, last_module_info->name); + return; + + } + last_module_info->pcs_beg = pcs_beg; last_module_info->pcs_end = pcs_end; + // This is a direct filter based on symbolizing inside the runtime. + // It should only be used with smaller binaries to avoid long startup + // times. Currently, this only supports a single token to scan for. + const char *pc_filter = getenv("AFL_PC_FILTER"); + + // This is a much faster PC filter based on pre-symbolized input data + // that is sorted for fast lookup through binary search. This method + // of filtering is suitable even for very large binaries. + const char *pc_filter_file = getenv("AFL_PC_FILTER_FILE"); + if (pc_filter_file && !__afl_filter_pcs) { + + afl_read_pc_filter_file(pc_filter_file); + + } + // Now update the pcmap. If this is the last module coming in, after all // pre-loaded code, then this will also map all of our delayed previous // modules. - - if (!__afl_pcmap_ptr) { return; } - + // for (afl_module_info_t *mod_info = __afl_module_info; mod_info; mod_info = mod_info->next) { if (mod_info->mapped) { continue; } + if (!mod_info->start) { + + fprintf(stderr, + "ERROR: __sanitizer_cov_pcs_init called with mod_info->start == " + "NULL (%s)\n", + mod_info->name); + abort(); + + } + PCTableEntry *start = (PCTableEntry *)(mod_info->pcs_beg); PCTableEntry *end = (PCTableEntry *)(mod_info->pcs_end); + if (!*mod_info->stop) { continue; } + u32 in_module_index = 0; while (start < end) { - if (mod_info->start + in_module_index >= __afl_map_size) { + if (*mod_info->start + in_module_index >= __afl_map_size) { - fprintf(stderr, "ERROR: __sanitizer_cov_pcs_init out of bounds?!\n"); + fprintf(stderr, + "ERROR: __sanitizer_cov_pcs_init out of bounds?! Start: %u " + "Stop: %u Map Size: %u (%s)\n", + *mod_info->start, *mod_info->stop, __afl_map_size, + mod_info->name); abort(); } + u32 orig_start_index = *mod_info->start; + uintptr_t PC = start->PC; // This is what `GetPreviousInstructionPc` in sanitizer runtime does @@ -1660,7 +1831,58 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, // Calculate relative offset in module PC = PC - mod_info->base_address; - __afl_pcmap_ptr[mod_info->start + in_module_index] = PC; + if (__afl_pcmap_ptr) { + + __afl_pcmap_ptr[orig_start_index + in_module_index] = PC; + + } + + if (pc_filter) { + + char PcDescr[1024]; + // This function is a part of the sanitizer run-time. + // To use it, link with AddressSanitizer or other sanitizer. + __sanitizer_symbolize_pc((void *)start->PC, "%p %F %L", PcDescr, + sizeof(PcDescr)); + + if (strstr(PcDescr, pc_filter)) { + + if (__afl_debug) + fprintf( + stderr, + "DEBUG: Selective instrumentation match: %s (PC %p Index %u)\n", + PcDescr, (void *)start->PC, + *(mod_info->start + in_module_index)); + // No change to guard needed + + } else { + + // Null out the guard to disable this edge + *(mod_info->start + in_module_index) = 0; + + } + + } + + if (__afl_filter_pcs && strstr(mod_info->name, __afl_filter_pcs_module)) { + + u32 result_index; + if (locate_in_pcs(PC, &result_index)) { + + if (__afl_debug) + fprintf(stderr, + "DEBUG: Selective instrumentation match: (PC %lx File " + "Index %u PC Index %u)\n", + PC, result_index, in_module_index); + + } else { + + // Null out the guard to disable this edge + *(mod_info->start + in_module_index) = 0; + + } + + } start++; in_module_index++; @@ -1671,8 +1893,10 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, if (__afl_debug) { - fprintf(stderr, "DEBUG: __sanitizer_cov_pcs_init initialized %u PCs\n", - in_module_index); + fprintf(stderr, + "DEBUG: __sanitizer_cov_pcs_init successfully mapped %s with %u " + "PCs\n", + mod_info->name, in_module_index); } @@ -1706,9 +1930,9 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { fprintf( stderr, "DEBUG: Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) " - "after_fs=%u\n", + "after_fs=%u *start=%u\n", start, stop, (unsigned long)(stop - start), - __afl_already_initialized_forkserver); + __afl_already_initialized_forkserver, *start); } @@ -1740,8 +1964,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { mod_info->id = last_module_info ? last_module_info->id + 1 : 0; mod_info->name = strdup(dlinfo.dli_fname); mod_info->base_address = (uintptr_t)dlinfo.dli_fbase; - mod_info->start = 0; - mod_info->stop = 0; + mod_info->start = NULL; + mod_info->stop = NULL; mod_info->pcs_beg = NULL; mod_info->pcs_end = NULL; mod_info->mapped = 0; @@ -1757,8 +1981,12 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { } - fprintf(stderr, "[pcmap] Module: %s Base Address: %p\n", dlinfo.dli_fname, - dlinfo.dli_fbase); + if (__afl_debug) { + + fprintf(stderr, "[pcmap] Module: %s Base Address: %p\n", + dlinfo.dli_fname, dlinfo.dli_fbase); + + } } @@ -1861,12 +2089,17 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { #ifdef __AFL_CODE_COVERAGE if (mod_info) { - mod_info->start = *orig_start; - mod_info->stop = *(stop - 1); + if (!mod_info->start) { + + mod_info->start = orig_start; + mod_info->stop = stop - 1; + + } + if (__afl_debug) { fprintf(stderr, "DEBUG: [pcmap] Start Index: %u Stop Index: %u\n", - mod_info->start, mod_info->stop); + *(mod_info->start), *(mod_info->stop)); } diff --git a/src/afl-cc.c b/src/afl-cc.c index 6d8e1024..73487188 100644 --- a/src/afl-cc.c +++ b/src/afl-cc.c @@ -1920,35 +1920,27 @@ void add_native_pcguard(aflcc_state_t *aflcc) { /* If llvm-config doesn't figure out LLVM_MAJOR, just go on anyway and let compiler complain if doesn't work. */ - if (aflcc->instrument_opt_mode & INSTRUMENT_OPT_CODECOV) { - #if LLVM_MAJOR > 0 && LLVM_MAJOR < 6 - FATAL("pcguard instrumentation with pc-table requires LLVM 6.0.1+"); + FATAL("pcguard instrumentation with pc-table requires LLVM 6.0.1+"); #else #if LLVM_MAJOR == 0 - WARNF( - "pcguard instrumentation with pc-table requires LLVM 6.0.1+" - " otherwise the compiler will fail"); + WARNF( + "pcguard instrumentation with pc-table requires LLVM 6.0.1+" + " otherwise the compiler will fail"); #endif + if (aflcc->instrument_opt_mode & INSTRUMENT_OPT_CODECOV) { + insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard,bb,no-prune,pc-table"); -#endif } else { -#if LLVM_MAJOR > 0 && LLVM_MAJOR < 4 - FATAL("pcguard instrumentation requires LLVM 4.0.1+"); -#else - #if LLVM_MAJOR == 0 - WARNF( - "pcguard instrumentation requires LLVM 4.0.1+" - " otherwise the compiler will fail"); - #endif - insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard"); -#endif + insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard,pc-table"); } +#endif + } void add_optimized_pcguard(aflcc_state_t *aflcc) { diff --git a/utils/dynamic_covfilter/README.md b/utils/dynamic_covfilter/README.md new file mode 100644 index 00000000..a64836f1 --- /dev/null +++ b/utils/dynamic_covfilter/README.md @@ -0,0 +1,55 @@ +# Dynamic Instrumentation Filter + +Sometimes it can be beneficial to limit the instrumentation feedback to +specific code locations. It is possible to do so at compile-time by simply +not instrumenting any undesired locations. However, there are situations +where doing this dynamically without requiring a new build can be beneficial. +Especially when dealing with larger builds, it is much more convenient to +select the target code locations at runtime instead of doing so at build time. + +There are two ways of doing this in AFL++: + +## Simple Selection with `AFL_PC_FILTER` + +This approach requires a build with `AFL_INSTRUMENTATION=llvmnative` or +`llvmcodecov` as well as an AddressSanitizer build with debug information. + +By setting the environment variable `AFL_PC_FILTER` to a string, the runtime +symbolizer is enabled in the AFL++ runtime. At startup, the runtime will call +the `__sanitizer_symbolize_pc` API to resolve every PC in every loaded module. +The runtime then matches the result using `strstr` and disables the PC guard +if the symbolized PC does not contain the specified string. + +This approach has the benefit of being very easy to use. The downside is that +it causes significant startup delays with large binaries and that it requires +an AddressSanitizer build. + +This method has no additional runtime overhead after startup. + +## Selection using pre-symbolized data file with `AFL_PC_FILTER_FILE` + +To avoid large startup time delays, a specific module can be pre-symbolized +using the `make_symbol_list.py` script. This script outputs a sorted list of +functions with their respective relative offsets and lengths in the target +binary: + +`python3 make_symbol_list.py libxul.so > libxul.symbols.txt` + +The resulting list can be filtered, e.g. using grep: + +`grep -i "webgl" libxul.symbols.txt > libxul.webgl.symbols.txt` + +Finally, you can run with `AFL_PC_FILTER_FILE=libxul.webgl.symbols.txt` to +restrict instrumentation feedback to the given locations. This approach only +has a minimal startup time delay due to the implementation only using binary +search on the given file per PC rather than reading debug information for every +PC. It also works well with Nyx, where symbolizing is usually disabled for the +target process to avoid delays with frequent crashes. + +Similar to the previous method, This approach requires a build with +`AFL_INSTRUMENTATION=llvmnative` or `llvmcodecov` as well debug information. +However, it does not require the ASan runtime as it doesn't do the symbolizing +in process. Due to the way it maps PCs to symbols, it is less accurate when it +comes to includes and inlines (it assumes all PCs within a function belong to +that function and originate from the same file). For most purposes, this should +be a reasonable simplification to quickly process even the largest binaries. diff --git a/utils/dynamic_covfilter/make_symbol_list.py b/utils/dynamic_covfilter/make_symbol_list.py new file mode 100644 index 00000000..d1dd6ab3 --- /dev/null +++ b/utils/dynamic_covfilter/make_symbol_list.py @@ -0,0 +1,73 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Written by Christian Holler <decoder at mozilla dot com> + +import json +import os +import sys +import subprocess + +if len(sys.argv) != 2: + print("Usage: %s binfile" % os.path.basename(sys.argv[0])) + sys.exit(1) + +binfile = sys.argv[1] + +addr2len = {} +addrs = [] + +output = subprocess.check_output(["objdump", "-t", binfile]).decode("utf-8") +for line in output.splitlines(): + line = line.replace("\t", " ") + components = [x for x in line.split(" ") if x] + if not components: + continue + try: + start_addr = int(components[0], 16) + except ValueError: + continue + + # Length has variable position in objdump output + length = None + for comp in components[1:]: + if len(comp) == 16: + try: + length = int(comp, 16) + break + except: + continue + + if length is None: + print("ERROR: Couldn't determine function section length: %s" % line) + + func = components[-1] + + addrs.append(start_addr) + addr2len[str(hex(start_addr))] = str(length) + +# The search implementation in the AFL runtime expects everything sorted. +addrs.sort() +addrs = [str(hex(addr)) for addr in addrs] + +# We symbolize in one go to speed things up with large binaries. +output = subprocess.check_output([ + "llvm-addr2line", + "--output-style=JSON", + "-f", "-C", "-a", "-e", + binfile], + input="\n".join(addrs).encode("utf-8")).decode("utf-8") + +output = output.strip().splitlines() +for line in output: + output = json.loads(line) + if "Symbol" in output and output["Address"] in addr2len: + final_output = [ + output["Address"], + addr2len[output["Address"]], + os.path.basename(output["ModuleName"]), + output["Symbol"][0]["FileName"], + output["Symbol"][0]["FunctionName"] + ] + print("\t".join(final_output)) |