about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--instrumentation/afl-compiler-rt.o.c279
-rw-r--r--src/afl-cc.c26
-rw-r--r--utils/dynamic_covfilter/README.md55
-rw-r--r--utils/dynamic_covfilter/make_symbol_list.py73
4 files changed, 393 insertions, 40 deletions
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index 39a762b6..8e55d6a0 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -22,6 +22,10 @@
     #define __USE_GNU
   #endif
   #include <dlfcn.h>
+
+__attribute__((weak)) void __sanitizer_symbolize_pc(void *, const char *fmt,
+                                                    char  *out_buf,
+                                                    size_t out_buf_size);
 #endif
 
 #ifdef __ANDROID__
@@ -124,8 +128,8 @@ struct afl_module_info_t {
   uintptr_t base_address;
 
   // PC Guard start/stop
-  u32 start;
-  u32 stop;
+  u32 *start;
+  u32 *stop;
 
   // PC Table begin/end
   const uintptr_t *pcs_beg;
@@ -147,6 +151,18 @@ afl_module_info_t *__afl_module_info = NULL;
 
 u32        __afl_pcmap_size = 0;
 uintptr_t *__afl_pcmap_ptr = NULL;
+
+typedef struct {
+
+  uintptr_t start;
+  u32       len;
+
+} FilterPCEntry;
+
+u32            __afl_filter_pcs_size = 0;
+FilterPCEntry *__afl_filter_pcs = NULL;
+u8            *__afl_filter_pcs_module = NULL;
+
 #endif  // __AFL_CODE_COVERAGE
 
 /* 1 if we are running in afl, and the forkserver was started, else 0 */
@@ -1587,15 +1603,116 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
 }
 
 #ifdef __AFL_CODE_COVERAGE
-void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
-                              const uintptr_t *pcs_end) {
+void afl_read_pc_filter_file(const char *filter_file) {
 
-  if (__afl_debug) {
+  FILE *file;
+  char  ch;
+
+  file = fopen(filter_file, "r");
+  if (file == NULL) {
+
+    perror("Error opening file");
+    return;
+
+  }
+
+  // Check how many PCs we expect to read
+  while ((ch = fgetc(file)) != EOF) {
+
+    if (ch == '\n') { __afl_filter_pcs_size++; }
+
+  }
+
+  // Rewind to actually read the PCs
+  fseek(file, 0, SEEK_SET);
+
+  __afl_filter_pcs = malloc(__afl_filter_pcs_size * sizeof(FilterPCEntry));
+  if (!__afl_filter_pcs) {
+
+    perror("Error allocating PC array");
+    return;
+
+  }
+
+  for (size_t i = 0; i < __afl_filter_pcs_size; i++) {
+
+    fscanf(file, "%lx", &(__afl_filter_pcs[i].start));
+    ch = fgetc(file);  // Read tab
+    fscanf(file, "%u", &(__afl_filter_pcs[i].len));
+    ch = fgetc(file);  // Read tab
+
+    if (!__afl_filter_pcs_module) {
+
+      // Read the module name and store it.
+      // TODO: We only support one module here right now although
+      // there is technically no reason to support multiple modules
+      // in one go.
+      size_t max_module_len = 255;
+      size_t i = 0;
+      __afl_filter_pcs_module = malloc(max_module_len);
+      while (i < max_module_len - 1 &&
+             (__afl_filter_pcs_module[i] = fgetc(file)) != '\t') {
+
+        ++i;
+
+      }
 
-    fprintf(stderr, "DEBUG: __sanitizer_cov_pcs_init called\n");
+      __afl_filter_pcs_module[i] = '\0';
+      fprintf(stderr, "DEBUGXXX: Read module name %s\n",
+              __afl_filter_pcs_module);
+
+    }
+
+    while ((ch = fgetc(file)) != '\n' && ch != EOF)
+      ;
+
+  }
+
+  fclose(file);
+
+}
+
+u32 locate_in_pcs(uintptr_t needle, u32 *index) {
+
+  size_t lower_bound = 0;
+  size_t upper_bound = __afl_filter_pcs_size - 1;
+
+  while (lower_bound < __afl_filter_pcs_size && lower_bound <= upper_bound) {
+
+    size_t current_index = lower_bound + (upper_bound - lower_bound) / 2;
+
+    if (__afl_filter_pcs[current_index].start <= needle) {
+
+      if (__afl_filter_pcs[current_index].start +
+              __afl_filter_pcs[current_index].len >
+          needle) {
+
+        // Hit
+        *index = current_index;
+        return 1;
+
+      } else {
+
+        lower_bound = current_index + 1;
+
+      }
+
+    } else {
+
+      if (!current_index) { break; }
+      upper_bound = current_index - 1;
+
+    }
 
   }
 
+  return 0;
+
+}
+
+void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
+                              const uintptr_t *pcs_end) {
+
   // If for whatever reason, we cannot get dlinfo here, then pc_guard_init also
   // couldn't get it and we'd end up attributing to the wrong module.
   Dl_info dlinfo;
@@ -1608,6 +1725,16 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
 
   }
 
+  if (__afl_debug) {
+
+    fprintf(
+        stderr,
+        "DEBUG: (%u) __sanitizer_cov_pcs_init called for module %s with %ld "
+        "PCs\n",
+        getpid(), dlinfo.dli_fname, pcs_end - pcs_beg);
+
+  }
+
   afl_module_info_t *last_module_info = __afl_module_info;
   while (last_module_info && last_module_info->next) {
 
@@ -1623,34 +1750,78 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
 
   }
 
+  if (strcmp(dlinfo.dli_fname, last_module_info->name)) {
+
+    // This can happen with modules being loaded after the forkserver
+    // where we decide to not track the module. In that case we must
+    // not track it here either.
+    fprintf(
+        stderr,
+        "WARNING: __sanitizer_cov_pcs_init module info mismatch: %s vs %s\n",
+        dlinfo.dli_fname, last_module_info->name);
+    return;
+
+  }
+
   last_module_info->pcs_beg = pcs_beg;
   last_module_info->pcs_end = pcs_end;
 
+  // This is a direct filter based on symbolizing inside the runtime.
+  // It should only be used with smaller binaries to avoid long startup
+  // times. Currently, this only supports a single token to scan for.
+  const char *pc_filter = getenv("AFL_PC_FILTER");
+
+  // This is a much faster PC filter based on pre-symbolized input data
+  // that is sorted for fast lookup through binary search. This method
+  // of filtering is suitable even for very large binaries.
+  const char *pc_filter_file = getenv("AFL_PC_FILTER_FILE");
+  if (pc_filter_file && !__afl_filter_pcs) {
+
+    afl_read_pc_filter_file(pc_filter_file);
+
+  }
+
   // Now update the pcmap. If this is the last module coming in, after all
   // pre-loaded code, then this will also map all of our delayed previous
   // modules.
-
-  if (!__afl_pcmap_ptr) { return; }
-
+  //
   for (afl_module_info_t *mod_info = __afl_module_info; mod_info;
        mod_info = mod_info->next) {
 
     if (mod_info->mapped) { continue; }
 
+    if (!mod_info->start) {
+
+      fprintf(stderr,
+              "ERROR: __sanitizer_cov_pcs_init called with mod_info->start == "
+              "NULL (%s)\n",
+              mod_info->name);
+      abort();
+
+    }
+
     PCTableEntry *start = (PCTableEntry *)(mod_info->pcs_beg);
     PCTableEntry *end = (PCTableEntry *)(mod_info->pcs_end);
 
+    if (!*mod_info->stop) { continue; }
+
     u32 in_module_index = 0;
 
     while (start < end) {
 
-      if (mod_info->start + in_module_index >= __afl_map_size) {
+      if (*mod_info->start + in_module_index >= __afl_map_size) {
 
-        fprintf(stderr, "ERROR: __sanitizer_cov_pcs_init out of bounds?!\n");
+        fprintf(stderr,
+                "ERROR: __sanitizer_cov_pcs_init out of bounds?! Start: %u "
+                "Stop: %u Map Size: %u (%s)\n",
+                *mod_info->start, *mod_info->stop, __afl_map_size,
+                mod_info->name);
         abort();
 
       }
 
+      u32 orig_start_index = *mod_info->start;
+
       uintptr_t PC = start->PC;
 
       // This is what `GetPreviousInstructionPc` in sanitizer runtime does
@@ -1660,7 +1831,58 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
       // Calculate relative offset in module
       PC = PC - mod_info->base_address;
 
-      __afl_pcmap_ptr[mod_info->start + in_module_index] = PC;
+      if (__afl_pcmap_ptr) {
+
+        __afl_pcmap_ptr[orig_start_index + in_module_index] = PC;
+
+      }
+
+      if (pc_filter) {
+
+        char PcDescr[1024];
+        // This function is a part of the sanitizer run-time.
+        // To use it, link with AddressSanitizer or other sanitizer.
+        __sanitizer_symbolize_pc((void *)start->PC, "%p %F %L", PcDescr,
+                                 sizeof(PcDescr));
+
+        if (strstr(PcDescr, pc_filter)) {
+
+          if (__afl_debug)
+            fprintf(
+                stderr,
+                "DEBUG: Selective instrumentation match: %s (PC %p Index %u)\n",
+                PcDescr, (void *)start->PC,
+                *(mod_info->start + in_module_index));
+          // No change to guard needed
+
+        } else {
+
+          // Null out the guard to disable this edge
+          *(mod_info->start + in_module_index) = 0;
+
+        }
+
+      }
+
+      if (__afl_filter_pcs && strstr(mod_info->name, __afl_filter_pcs_module)) {
+
+        u32 result_index;
+        if (locate_in_pcs(PC, &result_index)) {
+
+          if (__afl_debug)
+            fprintf(stderr,
+                    "DEBUG: Selective instrumentation match: (PC %lx File "
+                    "Index %u PC Index %u)\n",
+                    PC, result_index, in_module_index);
+
+        } else {
+
+          // Null out the guard to disable this edge
+          *(mod_info->start + in_module_index) = 0;
+
+        }
+
+      }
 
       start++;
       in_module_index++;
@@ -1671,8 +1893,10 @@ void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
 
     if (__afl_debug) {
 
-      fprintf(stderr, "DEBUG: __sanitizer_cov_pcs_init initialized %u PCs\n",
-              in_module_index);
+      fprintf(stderr,
+              "DEBUG: __sanitizer_cov_pcs_init successfully mapped %s with %u "
+              "PCs\n",
+              mod_info->name, in_module_index);
 
     }
 
@@ -1706,9 +1930,9 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
     fprintf(
         stderr,
         "DEBUG: Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) "
-        "after_fs=%u\n",
+        "after_fs=%u *start=%u\n",
         start, stop, (unsigned long)(stop - start),
-        __afl_already_initialized_forkserver);
+        __afl_already_initialized_forkserver, *start);
 
   }
 
@@ -1740,8 +1964,8 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
       mod_info->id = last_module_info ? last_module_info->id + 1 : 0;
       mod_info->name = strdup(dlinfo.dli_fname);
       mod_info->base_address = (uintptr_t)dlinfo.dli_fbase;
-      mod_info->start = 0;
-      mod_info->stop = 0;
+      mod_info->start = NULL;
+      mod_info->stop = NULL;
       mod_info->pcs_beg = NULL;
       mod_info->pcs_end = NULL;
       mod_info->mapped = 0;
@@ -1757,8 +1981,12 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
       }
 
-      fprintf(stderr, "[pcmap] Module: %s Base Address: %p\n", dlinfo.dli_fname,
-              dlinfo.dli_fbase);
+      if (__afl_debug) {
+
+        fprintf(stderr, "[pcmap] Module: %s Base Address: %p\n",
+                dlinfo.dli_fname, dlinfo.dli_fbase);
+
+      }
 
     }
 
@@ -1861,12 +2089,17 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 #ifdef __AFL_CODE_COVERAGE
   if (mod_info) {
 
-    mod_info->start = *orig_start;
-    mod_info->stop = *(stop - 1);
+    if (!mod_info->start) {
+
+      mod_info->start = orig_start;
+      mod_info->stop = stop - 1;
+
+    }
+
     if (__afl_debug) {
 
       fprintf(stderr, "DEBUG: [pcmap] Start Index: %u Stop Index: %u\n",
-              mod_info->start, mod_info->stop);
+              *(mod_info->start), *(mod_info->stop));
 
     }
 
diff --git a/src/afl-cc.c b/src/afl-cc.c
index 6d8e1024..73487188 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -1920,35 +1920,27 @@ void add_native_pcguard(aflcc_state_t *aflcc) {
   /* If llvm-config doesn't figure out LLVM_MAJOR, just
    go on anyway and let compiler complain if doesn't work. */
 
-  if (aflcc->instrument_opt_mode & INSTRUMENT_OPT_CODECOV) {
-
 #if LLVM_MAJOR > 0 && LLVM_MAJOR < 6
-    FATAL("pcguard instrumentation with pc-table requires LLVM 6.0.1+");
+  FATAL("pcguard instrumentation with pc-table requires LLVM 6.0.1+");
 #else
   #if LLVM_MAJOR == 0
-    WARNF(
-        "pcguard instrumentation with pc-table requires LLVM 6.0.1+"
-        " otherwise the compiler will fail");
+  WARNF(
+      "pcguard instrumentation with pc-table requires LLVM 6.0.1+"
+      " otherwise the compiler will fail");
   #endif
+  if (aflcc->instrument_opt_mode & INSTRUMENT_OPT_CODECOV) {
+
     insert_param(aflcc,
                  "-fsanitize-coverage=trace-pc-guard,bb,no-prune,pc-table");
-#endif
 
   } else {
 
-#if LLVM_MAJOR > 0 && LLVM_MAJOR < 4
-    FATAL("pcguard instrumentation requires LLVM 4.0.1+");
-#else
-  #if LLVM_MAJOR == 0
-    WARNF(
-        "pcguard instrumentation requires LLVM 4.0.1+"
-        " otherwise the compiler will fail");
-  #endif
-    insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard");
-#endif
+    insert_param(aflcc, "-fsanitize-coverage=trace-pc-guard,pc-table");
 
   }
 
+#endif
+
 }
 
 void add_optimized_pcguard(aflcc_state_t *aflcc) {
diff --git a/utils/dynamic_covfilter/README.md b/utils/dynamic_covfilter/README.md
new file mode 100644
index 00000000..a64836f1
--- /dev/null
+++ b/utils/dynamic_covfilter/README.md
@@ -0,0 +1,55 @@
+# Dynamic Instrumentation Filter
+
+Sometimes it can be beneficial to limit the instrumentation feedback to
+specific code locations. It is possible to do so at compile-time by simply
+not instrumenting any undesired locations. However, there are situations
+where doing this dynamically without requiring a new build can be beneficial.
+Especially when dealing with larger builds, it is much more convenient to
+select the target code locations at runtime instead of doing so at build time.
+
+There are two ways of doing this in AFL++:
+
+## Simple Selection with `AFL_PC_FILTER`
+
+This approach requires a build with `AFL_INSTRUMENTATION=llvmnative` or
+`llvmcodecov` as well as an AddressSanitizer build with debug information.
+
+By setting the environment variable `AFL_PC_FILTER` to a string, the runtime
+symbolizer is enabled in the AFL++ runtime. At startup, the runtime will call
+the `__sanitizer_symbolize_pc` API to resolve every PC in every loaded module.
+The runtime then matches the result using `strstr` and disables the PC guard
+if the symbolized PC does not contain the specified string.
+
+This approach has the benefit of being very easy to use. The downside is that
+it causes significant startup delays with large binaries and that it requires
+an AddressSanitizer build.
+
+This method has no additional runtime overhead after startup.
+
+## Selection using pre-symbolized data file with `AFL_PC_FILTER_FILE`
+
+To avoid large startup time delays, a specific module can be pre-symbolized
+using the `make_symbol_list.py` script. This script outputs a sorted list of
+functions with their respective relative offsets and lengths in the target
+binary:
+
+`python3 make_symbol_list.py libxul.so > libxul.symbols.txt`
+
+The resulting list can be filtered, e.g. using grep:
+
+`grep -i "webgl" libxul.symbols.txt > libxul.webgl.symbols.txt`
+
+Finally, you can run with `AFL_PC_FILTER_FILE=libxul.webgl.symbols.txt` to
+restrict instrumentation feedback to the given locations. This approach only
+has a minimal startup time delay due to the implementation only using binary
+search on the given file per PC rather than reading debug information for every
+PC. It also works well with Nyx, where symbolizing is usually disabled for the
+target process to avoid delays with frequent crashes.
+
+Similar to the previous method, This approach requires a build with 
+`AFL_INSTRUMENTATION=llvmnative` or `llvmcodecov` as well debug information.
+However, it does not require the ASan runtime as it doesn't do the symbolizing
+in process. Due to the way it maps PCs to symbols, it is less accurate when it
+comes to includes and inlines (it assumes all PCs within a function belong to
+that function and originate from the same file). For most purposes, this should
+be a reasonable simplification to quickly process even the largest binaries.
diff --git a/utils/dynamic_covfilter/make_symbol_list.py b/utils/dynamic_covfilter/make_symbol_list.py
new file mode 100644
index 00000000..d1dd6ab3
--- /dev/null
+++ b/utils/dynamic_covfilter/make_symbol_list.py
@@ -0,0 +1,73 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Written by Christian Holler <decoder at mozilla dot com>
+
+import json
+import os
+import sys
+import subprocess
+
+if len(sys.argv) != 2:
+    print("Usage: %s binfile" % os.path.basename(sys.argv[0]))
+    sys.exit(1)
+
+binfile = sys.argv[1]
+
+addr2len = {}
+addrs = []
+
+output = subprocess.check_output(["objdump", "-t", binfile]).decode("utf-8")
+for line in output.splitlines():
+    line = line.replace("\t", " ")
+    components = [x for x in line.split(" ") if x]
+    if not components:
+        continue
+    try:
+        start_addr = int(components[0], 16)
+    except ValueError:
+        continue
+
+    # Length has variable position in objdump output
+    length = None
+    for comp in components[1:]:
+        if len(comp) == 16:
+            try:
+                length = int(comp, 16)
+                break
+            except:
+                continue
+
+    if length is None:
+        print("ERROR: Couldn't determine function section length: %s" % line)
+
+    func = components[-1]
+    
+    addrs.append(start_addr)
+    addr2len[str(hex(start_addr))] = str(length)
+
+# The search implementation in the AFL runtime expects everything sorted.
+addrs.sort()
+addrs = [str(hex(addr)) for addr in addrs]
+
+# We symbolize in one go to speed things up with large binaries.
+output = subprocess.check_output([
+    "llvm-addr2line",
+    "--output-style=JSON",
+    "-f", "-C", "-a", "-e",
+    binfile],
+    input="\n".join(addrs).encode("utf-8")).decode("utf-8")
+
+output = output.strip().splitlines()
+for line in output:
+    output = json.loads(line)
+    if "Symbol" in output and output["Address"] in addr2len:
+        final_output = [
+            output["Address"],
+            addr2len[output["Address"]],
+            os.path.basename(output["ModuleName"]),
+            output["Symbol"][0]["FileName"],
+            output["Symbol"][0]["FunctionName"]
+        ]
+        print("\t".join(final_output))