diff options
Diffstat (limited to 'instrumentation/afl-gcc-pass.so.cc')
-rw-r--r-- | instrumentation/afl-gcc-pass.so.cc | 1218 |
1 files changed, 780 insertions, 438 deletions
diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc index c5614aca..6d6f4636 100644 --- a/instrumentation/afl-gcc-pass.so.cc +++ b/instrumentation/afl-gcc-pass.so.cc @@ -1,36 +1,23 @@ -// -// There are some TODOs in this file: -// - fix instrumentation via external call -// - fix inline instrumentation -// - implement instrument list feature -// - dont instrument blocks that are uninteresting -// - implement neverZero -// - -/* - american fuzzy lop++ - GCC instrumentation pass - --------------------------------------------- - - Written by Austin Seipp <aseipp@pobox.com> with bits from - Emese Revfy <re.emese@gmail.com> - - Fixed by Heiko Eißfeldt 2019-2020 for AFL++ - - GCC integration design is based on the LLVM design, which comes - from Laszlo Szekeres. Some of the boilerplate code below for - afl_pass to adapt to different GCC versions was taken from Emese - Revfy's Size Overflow plugin for GCC, licensed under the GPLv2/v3. - - (NOTE: this plugin code is under GPLv3, in order to comply with the - GCC runtime library exception, which states that you may distribute - "Target Code" from the compiler under a license of your choice, as - long as the "Compilation Process" is "Eligible", and contains no - GPL-incompatible software in GCC "during the process of - transforming high level code to target code". In this case, the - plugin will be used to generate "Target Code" during the - "Compilation Process", and thus it must be GPLv3 to be "eligible".) - - Copyright (C) 2015 Austin Seipp +/* GCC plugin for instrumentation of code for american fuzzy lop. + + Copyright 2014-2019 Free Software Foundation, Inc + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AdaCore + + Written by Alexandre Oliva <oliva@adacore.com>, based on the AFL + LLVM pass by Laszlo Szekeres <lszekeres@google.com> and Michal + Zalewski <lcamtuf@google.com>, and copying a little boilerplate + from GCC's libcc1 plugin and GCC proper. Aside from the + boilerplate, namely includes and the pass data structure, and pass + initialization code and output messages borrowed and adapted from + the LLVM pass into plugin_init and plugin_finalize, the + implementation of the GCC pass proper is written from scratch, + aiming at similar behavior and performance to that of the LLVM + pass, and also at compatibility with the out-of-line + instrumentation and run times of AFL++, as well as of an earlier + GCC plugin implementation by Austin Seipp <aseipp@pobox.com>. The + implementation of Allow/Deny Lists is adapted from that in the LLVM + plugin. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,555 +34,910 @@ */ -#define BUILD_INLINE_INST +/* This file implements a GCC plugin that introduces an + instrumentation pass for AFL. What follows is the specification + used to rewrite it, extracted from the functional llvm_mode pass + and from an implementation of the gcc_plugin started by Austin + Seipp <aseipp@pobox.com>. + + Declare itself as GPL-compatible. + + Define a 'plugin_init' function. + + Check version against the global gcc_version. + + Register a PLUGIN_INFO object with .version and .help. + + Initialize the random number generator seed with GCC's + random seed. + + Set quiet mode depending on whether stderr is a terminal and + AFL_QUIET is set. + + Output some identification message if not in quiet mode. + + Parse AFL_INST_RATIO, if set, as a number between 0 and 100. Error + out if it's not in range; set up an instrumentation ratio global + otherwise. + + Introduce a single instrumentation pass after SSA. + + The new pass is to be a GIMPLE_PASS. Given the sort of + instrumentation it's supposed to do, its todo_flags_finish will + certainly need TODO_update_ssa, and TODO_cleanup_cfg. + TODO_verify_il is probably desirable, at least during debugging. + TODO_rebuild_cgraph_edges is required only in the out-of-line + instrumentation mode. + + The instrumentation pass amounts to iterating over all basic blocks + and optionally inserting one of the instrumentation sequences below + after its labels, to indicate execution entered the block. + + A block should be skipped if R(100) (from ../types.h) is >= the + global instrumentation ratio. + + A block may be skipped for other reasons, such as if all of its + predecessors have a single successor. + + For an instrumented block, a R(MAP_SIZE) say <N> should be + generated to be used as its location number. Let <C> be a compiler + constant built out of it. + + Count instrumented blocks and print a message at the end of the + compilation, if not in quiet mode. + + Instrumentation in "dumb" or "out-of-line" mode requires calling a + function, passing it the location number. The function to be + called is __afl_trace, implemented in afl-gcc-rt.o.c. Its + declaration <T> needs only be created once. + + Build the call statement <T> (<C>), then add it to the seq to be + inserted. + + Instrumentation in "fast" or "inline" mode performs the computation + of __afl_trace as part of the function. + + It needs to read and write __afl_prev_loc, a TLS u32 variable. Its + declaration <P> needs only be created once. + + It needs to read and dereference __afl_area_ptr, a pointer to (an + array of) char. Its declaration <M> needs only be created once. + + The instrumentation sequence should then be filled with the + following statements: + + Load from <P> to a temporary (<TP>) of the same type. + + Compute <TP> ^ <C> in sizetype, converting types as needed. + + Pointer-add <B> (to be introduced at a later point) and <I> into + another temporary <A>. + + Increment the <*A> MEM_REF. + + Store <C> >> 1 in <P>. + + Temporaries used above need only be created once per function. + + If any block was instrumented in a function, an initializer for <B> + needs to be introduced, loading it from <M> and inserting it in the + entry edge for the entry block. +*/ #include "../include/config.h" #include "../include/debug.h" -/* clear helper macros AFL types pull in, which intervene with gcc-plugin - * headers from GCC-8 */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + #ifdef likely - #undef likely +# undef likely #endif #ifdef unlikely - #undef unlikely +# undef unlikely #endif -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - #include <list> #include <string> #include <fstream> +#include <algorithm> +#include <fnmatch.h> + #include <gcc-plugin.h> #include <plugin-version.h> -#include <diagnostic.h> -#include <tree.h> -#include <tree-ssa.h> +#include <toplev.h> #include <tree-pass.h> -#include <tree-ssa-alias.h> -#include <basic-block.h> -#include <gimple-expr.h> +#include <context.h> +#include <tree.h> +#include <gimplify.h> #include <gimple.h> #include <gimple-iterator.h> -#include <gimple-ssa.h> -#include <version.h> -#include <toplev.h> +#include <ssa.h> + #include <intl.h> -#include <context.h> -#include <stringpool.h> -#include <cgraph.h> -#include <cfgloop.h> -/* -------------------------------------------------------------------------- */ -/* -- AFL instrumentation pass ---------------------------------------------- */ +/* This plugin, being under the same license as GCC, satisfies the + "GPL-compatible Software" definition in the GCC RUNTIME LIBRARY + EXCEPTION, so it can be part of an "Eligible" "Compilation + Process". */ +int plugin_is_GPL_compatible = 1; -static int be_quiet = 0; -static unsigned int inst_ratio = 100; -static bool inst_ext = true; -static std::list<std::string> myInstrumentList; +namespace { -static unsigned int ext_call_instrument(function *fun) { +static const struct pass_data afl_pass_data = + { + .type = GIMPLE_PASS, + .name = "afl", + .optinfo_flags = OPTGROUP_NONE, + .tv_id = TV_NONE, + .properties_required = 0, + .properties_provided = 0, + .properties_destroyed = 0, + .todo_flags_start = 0, + .todo_flags_finish = (TODO_update_ssa + | TODO_cleanup_cfg + | TODO_verify_il), + }; + +struct afl_pass : gimple_opt_pass { + afl_pass (bool quiet, unsigned int ratio) + : gimple_opt_pass (afl_pass_data, g), + be_quiet (quiet), + debug (!!getenv ("AFL_DEBUG")), + inst_ratio (ratio), +#ifdef AFL_GCC_OUT_OF_LINE + out_of_line (!!(AFL_GCC_OUT_OF_LINE)), +#else + out_of_line (getenv ("AFL_GCC_OUT_OF_LINE")), +#endif + neverZero (!getenv ("AFL_GCC_SKIP_NEVERZERO")), + inst_blocks (0) + { + initInstrumentList (); + } - /* Instrument all the things! */ - basic_block bb; - unsigned finst_blocks = 0; - unsigned fcnt_blocks = 0; + /* Are we outputting to a non-terminal, or running with AFL_QUIET + set? */ + const bool be_quiet; + + /* Are we running with AFL_DEBUG set? */ + const bool debug; + + /* How likely (%) is a block to be instrumented? */ + const unsigned int inst_ratio; + + /* Should we use slow, out-of-line call-based instrumentation? */ + const bool out_of_line; + + /* Should we make sure the map edge-crossing counters never wrap + around to zero? */ + const bool neverZero; + + /* Count instrumented blocks. */ + int inst_blocks; + + virtual unsigned int + execute (function *fn) + { + if (!isInInstrumentList(fn)) + return 0; + + int blocks = 0; + + /* These are temporaries used by inline instrumentation only, that + are live throughout the function. */ + tree ploc = NULL, indx = NULL, map = NULL, map_ptr = NULL, + ntry = NULL, cntr = NULL, xaddc = NULL, xincr = NULL; + + basic_block bb; + FOR_EACH_BB_FN (bb, fn) + { + if (!instrument_block_p (bb)) + continue; + + /* Generate the block identifier. */ + unsigned bid = R (MAP_SIZE); + tree bidt = build_int_cst (sizetype, bid); + + gimple_seq seq = NULL; + + if (out_of_line) + { + static tree afl_trace = get_afl_trace_decl (); + + /* Call __afl_trace with bid, the new location; */ + gcall *call = gimple_build_call (afl_trace, 1, bidt); + gimple_seq_add_stmt (&seq, call); + } + else + { + static tree afl_prev_loc = get_afl_prev_loc_decl (); + static tree afl_area_ptr = get_afl_area_ptr_decl (); + + /* Load __afl_prev_loc to a temporary ploc. */ + if (blocks == 0) + ploc = create_tmp_var (TREE_TYPE (afl_prev_loc), ".afl_prev_loc"); + gimple *load_loc = gimple_build_assign (ploc, afl_prev_loc); + gimple_seq_add_stmt (&seq, load_loc); + + /* Compute the index into the map referenced by area_ptr + that we're to update: indx = (sizetype) ploc ^ bid. */ + if (blocks == 0) + indx = create_tmp_var (TREE_TYPE (bidt), ".afl_index"); + gimple *conv_ploc + = gimple_build_assign (indx, + fold_convert (TREE_TYPE (indx), + ploc)); + gimple_seq_add_stmt (&seq, conv_ploc); + gimple *xor_loc = gimple_build_assign (indx, BIT_XOR_EXPR, + indx, bidt); + gimple_seq_add_stmt (&seq, xor_loc); + + /* Compute the address of that map element. */ + if (blocks == 0) + { + map = afl_area_ptr; + map_ptr = create_tmp_var (TREE_TYPE (afl_area_ptr), + ".afl_map_ptr"); + ntry = create_tmp_var (TREE_TYPE (afl_area_ptr), + ".afl_map_entry"); + } + gimple *idx_map = gimple_build_assign (ntry, POINTER_PLUS_EXPR, + map_ptr, indx); + gimple_seq_add_stmt (&seq, idx_map); + + /* Increment the counter in idx_map. */ + tree memref = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (ntry)), + ntry, build_zero_cst (TREE_TYPE (ntry))); + if (blocks == 0) + cntr = create_tmp_var (TREE_TYPE (memref), ".afl_edge_count"); + + gimple *load_cntr = gimple_build_assign (cntr, memref); + gimple_seq_add_stmt (&seq, load_cntr); + + tree cntrb = cntr; + tree incrv = build_one_cst (TREE_TYPE (cntr)); + + if (neverZero) + { + /* NeverZero: if count wrapped around to zero, advance to + one. */ + if (blocks == 0) + { + xaddc = create_tmp_var (build_complex_type + (TREE_TYPE (memref)), + ".afl_edge_xaddc"); + xincr = create_tmp_var (TREE_TYPE (memref), + ".afl_edge_xincr"); + } + + auto_vec<tree> vargs (2); + vargs.quick_push (cntr); + vargs.quick_push (incrv); + gcall *add1_cntr + = gimple_build_call_internal_vec (IFN_ADD_OVERFLOW, vargs); + gimple_call_set_lhs (add1_cntr, xaddc); + gimple_seq_add_stmt (&seq, add1_cntr); + + cntrb = build1 (REALPART_EXPR, TREE_TYPE (cntr), xaddc); + incrv = build1 (IMAGPART_EXPR, TREE_TYPE (xincr), xaddc); + } + + gimple *incr_cntr = gimple_build_assign (cntr, PLUS_EXPR, + cntrb, incrv); + gimple_seq_add_stmt (&seq, incr_cntr); + + gimple *store_cntr = gimple_build_assign (unshare_expr (memref), + cntr); + gimple_seq_add_stmt (&seq, store_cntr); + + /* Store bid >> 1 in __afl_prev_loc. */ + gimple *shift_loc = gimple_build_assign (ploc, + build_int_cst + (TREE_TYPE (ploc), + bid >> 1)); + gimple_seq_add_stmt (&seq, shift_loc); + gimple *store_loc = gimple_build_assign (afl_prev_loc, ploc); + gimple_seq_add_stmt (&seq, store_loc); + } + + /* Insert the generated sequence. */ + gimple_stmt_iterator insp = gsi_after_labels (bb); + gsi_insert_seq_before (&insp, seq, GSI_SAME_STMT); + + /* Bump this function's instrumented block counter. */ + blocks++; + } - tree fntype = build_function_type_list(void_type_node, /* return */ - uint32_type_node, /* args */ - NULL_TREE); /* done */ - tree fndecl = build_fn_decl("__afl_trace", fntype); - TREE_STATIC(fndecl) = 1; /* Defined elsewhere */ - TREE_PUBLIC(fndecl) = 1; /* Public */ - DECL_EXTERNAL(fndecl) = 1; /* External linkage */ - DECL_ARTIFICIAL(fndecl) = 1; /* Injected by compiler */ + /* Aggregate the instrumented block count. */ + inst_blocks += blocks; - FOR_EACH_BB_FN(bb, fun) { + if (blocks) + { + if (out_of_line) + return TODO_rebuild_cgraph_edges; - gimple_seq fcall; - gimple_seq seq = NULL; - gimple_stmt_iterator bentry; - ++fcnt_blocks; + gimple_seq seq = NULL; - // only instrument if this basic block is the destination of a previous - // basic block that has multiple successors - // this gets rid of ~5-10% of instrumentations that are unnecessary - // result: a little more speed and less map pollution + /* Load afl_area_ptr into map_ptr. We want to do this only + once per function. */ + gimple *load_ptr = gimple_build_assign (map_ptr, map); + gimple_seq_add_stmt (&seq, load_ptr); - int more_than_one = -1; - edge ep; - edge_iterator eip; + /* Insert it in the edge to the entry block. We don't want to + insert it in the first block, since there might be a loop + or a goto back to it. Insert in the edge, which may create + another block. */ + edge e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fn)); + gsi_insert_seq_on_edge_immediate (e, seq); + } - FOR_EACH_EDGE(ep, eip, bb->preds) { + return 0; + } - int count = 0; - if (more_than_one == -1) more_than_one = 0; + /* Decide whether to instrument block BB. Skip it due to the random + distribution, or if it's the single successor of all its + predecessors. */ + inline bool + instrument_block_p (basic_block bb) + { + if (R (100) >= inst_ratio) + return false; + + edge e; edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + if (!single_succ_p (e->src)) + return true; + + return false; + } - basic_block Pred = ep->src; - edge es; - edge_iterator eis; - FOR_EACH_EDGE(es, eis, Pred->succs) { + /* Create and return a declaration for the __afl_trace rt function. */ + static inline tree + get_afl_trace_decl () + { + tree type = build_function_type_list (void_type_node, + uint16_type_node, + NULL_TREE); + tree decl = build_fn_decl ("__afl_trace", type); - basic_block Succ = es->dest; - if (Succ != NULL) count++; + TREE_PUBLIC (decl) = 1; + DECL_EXTERNAL (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; - } + return decl; + } - if (count > 1) more_than_one = 1; + /* Create and return a declaration for the __afl_prev_loc + thread-local variable. */ + static inline tree + get_afl_prev_loc_decl () + { + tree decl = build_decl (BUILTINS_LOCATION, VAR_DECL, + get_identifier ("__afl_prev_loc"), + uint32_type_node); + TREE_PUBLIC (decl) = 1; + DECL_EXTERNAL (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + TREE_STATIC (decl) = 1; + set_decl_tls_model (decl, + (flag_pic + ? TLS_MODEL_INITIAL_EXEC + : TLS_MODEL_LOCAL_EXEC)); + return decl; + } + + /* Create and return a declaration for the __afl_prev_loc + thread-local variable. */ + static inline tree + get_afl_area_ptr_decl () + { + tree type = build_pointer_type (unsigned_char_type_node); + tree decl = build_decl (BUILTINS_LOCATION, VAR_DECL, + get_identifier ("__afl_area_ptr"), + type); + TREE_PUBLIC (decl) = 1; + DECL_EXTERNAL (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + TREE_STATIC (decl) = 1; + + return decl; + } + /* This is registered as a plugin finalize callback, to print an + instrumentation summary unless in quiet mode. */ + static void + plugin_finalize (void *, void *p) + { + opt_pass *op = (opt_pass *)p; + afl_pass &self = (afl_pass &)*op; + + if (!self.be_quiet) { + if (!self.inst_blocks) + WARNF ("No instrumentation targets found."); + else + OKF ("Instrumented %u locations (%s mode, %s, ratio %u%%).", + self.inst_blocks, + getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened"), + self.out_of_line ? G_("out of line") : G_("inline"), + self.inst_ratio); } + } - if (more_than_one != 1) continue; +#define report_fatal_error(msg) BADF(msg) - /* Bail on this block if we trip the specified ratio */ - if (R(100) >= inst_ratio) continue; + std::list<std::string> allowListFiles; + std::list<std::string> allowListFunctions; + std::list<std::string> denyListFiles; + std::list<std::string> denyListFunctions; - /* Make up cur_loc */ - unsigned int rand_loc = R(MAP_SIZE); - tree cur_loc = build_int_cst(uint32_type_node, rand_loc); + /* Note: this ignore check is also called in isInInstrumentList() */ + bool isIgnoreFunction(function *F) { - /* Update bitmap via external call */ - /* to quote: - * /+ Trace a basic block with some ID +/ - * void __afl_trace(u32 x); - */ + // Starting from "LLVMFuzzer" these are functions used in libfuzzer based + // fuzzing campaign installations, e.g. oss-fuzz - fcall = gimple_build_call( - fndecl, 1, - cur_loc); /* generate the function _call_ to above built reference, with - *1* parameter -> the random const for the location */ - gimple_seq_add_stmt(&seq, fcall); /* and insert into a sequence */ + static const char *ignoreList[] = { - /* Done - grab the entry to the block and insert sequence */ - bentry = gsi_after_labels(bb); - gsi_insert_seq_before(&bentry, seq, GSI_SAME_STMT); + "asan.", + "llvm.", + "sancov.", + "__ubsan_", + "ign.", + "__afl_", + "_fini", + "__libc_csu", + "__asan", + "__msan", + "__cmplog", + "__sancov", + "msan.", + "LLVMFuzzer", + "__decide_deferred", + "maybe_duplicate_stderr", + "discard_output", + "close_stdout", + "dup_and_close_stderr", + "maybe_close_fd_mask", + "ExecuteFilesOnyByOne" - ++finst_blocks; + }; - } + const char *name = IDENTIFIER_POINTER (DECL_NAME (F->decl)); + int len = IDENTIFIER_LENGTH (DECL_NAME (F->decl)); + + for (auto const &ignoreListFunc : ignoreList) { - /* Say something nice. */ - if (!be_quiet) { + if (strncmp (name, ignoreListFunc, len) == 0) { return true; } + + } - if (!finst_blocks) - WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST), - function_name(fun)); - else if (finst_blocks < fcnt_blocks) - OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST), - finst_blocks, fcnt_blocks, function_name(fun)); - else - OKF(G_("Instrumented %2u locations in " cBRI "%s" cRST), finst_blocks, - function_name(fun)); + return false; } - return 0; + void initInstrumentList() { + + char *allowlist = getenv("AFL_GCC_ALLOWLIST"); + if (!allowlist) allowlist = getenv("AFL_GCC_INSTRUMENT_FILE"); + if (!allowlist) allowlist = getenv("AFL_GCC_WHITELIST"); + if (!allowlist) allowlist = getenv("AFL_LLVM_ALLOWLIST"); + if (!allowlist) allowlist = getenv("AFL_LLVM_INSTRUMENT_FILE"); + if (!allowlist) allowlist = getenv("AFL_LLVM_WHITELIST"); + char *denylist = getenv("AFL_GCC_DENYLIST"); + if (!denylist) denylist = getenv("AFL_GCC_BLOCKLIST"); + if (!denylist) denylist = getenv("AFL_LLVM_DENYLIST"); + if (!denylist) denylist = getenv("AFL_LLVM_BLOCKLIST"); + + if (allowlist && denylist) + FATAL( + "You can only specify either AFL_GCC_ALLOWLIST or AFL_GCC_DENYLIST " + "but not both!"); + + if (allowlist) { + + std::string line; + std::ifstream fileStream; + fileStream.open(allowlist); + if (!fileStream) report_fatal_error("Unable to open AFL_GCC_ALLOWLIST"); + getline(fileStream, line); -} + while (fileStream) { + + int is_file = -1; + std::size_t npos; + std::string original_line = line; + + line.erase(std::remove_if(line.begin(), line.end(), ::isspace), + line.end()); + + // remove # and following + if ((npos = line.find("#")) != std::string::npos) + line = line.substr(0, npos); + + if (line.compare(0, 4, "fun:") == 0) { + + is_file = 0; + line = line.substr(4); + + } else if (line.compare(0, 9, "function:") == 0) { + + is_file = 0; + line = line.substr(9); + + } else if (line.compare(0, 4, "src:") == 0) { + + is_file = 1; + line = line.substr(4); + + } else if (line.compare(0, 7, "source:") == 0) { + + is_file = 1; + line = line.substr(7); + + } + + if (line.find(":") != std::string::npos) { + + FATAL("invalid line in AFL_GCC_ALLOWLIST: %s", original_line.c_str()); + + } -static unsigned int inline_instrument(function *fun) { - - /* Instrument all the things! */ - basic_block bb; - unsigned finst_blocks = 0; - unsigned fcnt_blocks = 0; - tree one = build_int_cst(unsigned_char_type_node, 1); - // tree zero = build_int_cst(unsigned_char_type_node, 0); - - /* Set up global type declarations */ - tree map_type = build_pointer_type(unsigned_char_type_node); - tree map_ptr_g = - build_decl(UNKNOWN_LOCATION, VAR_DECL, - get_identifier_with_length("__afl_area_ptr", 14), map_type); - TREE_USED(map_ptr_g) = 1; - TREE_STATIC(map_ptr_g) = 1; /* Defined elsewhere */ - DECL_EXTERNAL(map_ptr_g) = 1; /* External linkage */ - DECL_PRESERVE_P(map_ptr_g) = 1; - DECL_ARTIFICIAL(map_ptr_g) = 1; /* Injected by compiler */ - rest_of_decl_compilation(map_ptr_g, 1, 0); - - tree prev_loc_g = build_decl(UNKNOWN_LOCATION, VAR_DECL, - get_identifier_with_length("__afl_prev_loc", 14), - uint32_type_node); - TREE_USED(prev_loc_g) = 1; - TREE_STATIC(prev_loc_g) = 1; /* Defined elsewhere */ - DECL_EXTERNAL(prev_loc_g) = 1; /* External linkage */ - DECL_PRESERVE_P(prev_loc_g) = 1; - DECL_ARTIFICIAL(prev_loc_g) = 1; /* Injected by compiler */ - set_decl_tls_model(prev_loc_g, TLS_MODEL_REAL); /* TLS attribute */ - rest_of_decl_compilation(prev_loc_g, 1, 0); - - FOR_EACH_BB_FN(bb, fun) { - - gimple_seq seq = NULL; - gimple_stmt_iterator bentry; - ++fcnt_blocks; - - // only instrument if this basic block is the destination of a previous - // basic block that has multiple successors - // this gets rid of ~5-10% of instrumentations that are unnecessary - // result: a little more speed and less map pollution - - int more_than_one = -1; - edge ep; - edge_iterator eip; - FOR_EACH_EDGE(ep, eip, bb->preds) { - - int count = 0; - if (more_than_one == -1) more_than_one = 0; - - basic_block Pred = ep->src; - edge es; - edge_iterator eis; - FOR_EACH_EDGE(es, eis, Pred->succs) { - - basic_block Succ = es->dest; - if (Succ != NULL) count++; + if (line.length() > 0) { + + // if the entry contains / or . it must be a file + if (is_file == -1) + if (line.find("/") != std::string::npos || + line.find(".") != std::string::npos) + is_file = 1; + // otherwise it is a function + + if (is_file == 1) + allowListFiles.push_back(line); + else + allowListFunctions.push_back(line); + getline(fileStream, line); + + } } - if (count > 1) more_than_one = 1; + if (debug) + SAYF(cMGN "[D] " cRST + "loaded allowlist with %zu file and %zu function entries\n", + allowListFiles.size(), allowListFunctions.size()); } - if (more_than_one != 1) continue; - - /* Bail on this block if we trip the specified ratio */ - if (R(100) >= inst_ratio) continue; - - /* Make up cur_loc */ - - unsigned int rand_loc = R(MAP_SIZE); - tree cur_loc = build_int_cst(uint32_type_node, rand_loc); - - /* Load prev_loc, xor with cur_loc */ - // gimple_assign <var_decl, prev_loc.0_1, prev_loc, NULL, NULL> - tree prev_loc = create_tmp_var_raw(uint32_type_node, "prev_loc"); - gassign *g = gimple_build_assign(prev_loc, VAR_DECL, prev_loc_g); - gimple_seq_add_stmt(&seq, g); // load prev_loc - update_stmt(g); - - // gimple_assign <bit_xor_expr, _2, prev_loc.0_1, 47231, NULL> - tree area_off = create_tmp_var_raw(uint32_type_node, "area_off"); - g = gimple_build_assign(area_off, BIT_XOR_EXPR, prev_loc, cur_loc); - gimple_seq_add_stmt(&seq, g); // area_off = prev_loc ^ cur_loc - update_stmt(g); - - /* Update bitmap */ - - // gimple_assign <addr_expr, p_6, &map[_2], NULL, NULL> - tree map_ptr = create_tmp_var(map_type, "map_ptr"); - tree map_ptr2 = create_tmp_var(map_type, "map_ptr2"); - - g = gimple_build_assign(map_ptr, map_ptr_g); - gimple_seq_add_stmt(&seq, g); // map_ptr = __afl_area_ptr - update_stmt(g); - -#if 1 - #if 0 - tree addr = build2(ADDR_EXPR, map_type, map_ptr, area_off); - g = gimple_build_assign(map_ptr2, MODIFY_EXPR, addr); - gimple_seq_add_stmt(&seq, g); // map_ptr2 = map_ptr + area_off - update_stmt(g); - #else - g = gimple_build_assign(map_ptr2, PLUS_EXPR, map_ptr, area_off); - gimple_seq_add_stmt(&seq, g); // map_ptr2 = map_ptr + area_off - update_stmt(g); - #endif - - // gimple_assign <mem_ref, _3, *p_6, NULL, NULL> - tree tmp1 = create_tmp_var_raw(unsigned_char_type_node, "tmp1"); - g = gimple_build_assign(tmp1, MEM_REF, map_ptr2); - gimple_seq_add_stmt(&seq, g); // tmp1 = *map_ptr2 - update_stmt(g); -#else - tree atIndex = build2(PLUS_EXPR, uint32_type_node, map_ptr, area_off); - tree array_address = build1(ADDR_EXPR, map_type, atIndex); - tree array_access = build1(INDIRECT_REF, map_type, array_address); - tree tmp1 = create_tmp_var(unsigned_char_type_node, "tmp1"); - g = gimple_build_assign(tmp1, array_access); - gimple_seq_add_stmt(&seq, g); // tmp1 = *(map_ptr + area_off) - update_stmt(g); -#endif - // gimple_assign <plus_expr, _4, _3, 1, NULL> - tree tmp2 = create_tmp_var_raw(unsigned_char_type_node, "tmp2"); - g = gimple_build_assign(tmp2, PLUS_EXPR, tmp1, one); - gimple_seq_add_stmt(&seq, g); // tmp2 = tmp1 + 1 - update_stmt(g); + if (denylist) { - // TODO: neverZero: here we have to check if tmp3 == 0 - // and add 1 if so + std::string line; + std::ifstream fileStream; + fileStream.open(denylist); + if (!fileStream) report_fatal_error("Unable to open AFL_GCC_DENYLIST"); + getline(fileStream, line); - // gimple_assign <ssa_name, *p_6, _4, NULL, NULL> - // tree map_ptr3 = create_tmp_var_raw(map_type, "map_ptr3"); - g = gimple_build_assign(map_ptr2, INDIRECT_REF, tmp2); - gimple_seq_add_stmt(&seq, g); // *map_ptr2 = tmp2 - update_stmt(g); + while (fileStream) { - /* Set prev_loc to cur_loc >> 1 */ + int is_file = -1; + std::size_t npos; + std::string original_line = line; - // gimple_assign <integer_cst, prev_loc, 23615, NULL, NULL> - tree shifted_loc = build_int_cst(TREE_TYPE(prev_loc_g), rand_loc >> 1); - tree prev_loc2 = create_tmp_var_raw(uint32_type_node, "prev_loc2"); - g = gimple_build_assign(prev_loc2, shifted_loc); - gimple_seq_add_stmt(&seq, g); // __afl_prev_loc = cur_loc >> 1 - update_stmt(g); - g = gimple_build_assign(prev_loc_g, prev_loc2); - gimple_seq_add_stmt(&seq, g); // __afl_prev_loc = cur_loc >> 1 - update_stmt(g); + line.erase(std::remove_if(line.begin(), line.end(), ::isspace), + line.end()); - /* Done - grab the entry to the block and insert sequence */ + // remove # and following + if ((npos = line.find("#")) != std::string::npos) + line = line.substr(0, npos); - bentry = gsi_after_labels(bb); - gsi_insert_seq_before(&bentry, seq, GSI_NEW_STMT); + if (line.compare(0, 4, "fun:") == 0) { - ++finst_blocks; + is_file = 0; + line = line.substr(4); - } + } else if (line.compare(0, 9, "function:") == 0) { - /* Say something nice. */ - if (!be_quiet) { + is_file = 0; + line = line.substr(9); - if (!finst_blocks) - WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST), - function_name(fun)); - else if (finst_blocks < fcnt_blocks) - OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST), - finst_blocks, fcnt_blocks, function_name(fun)); - else - OKF(G_("Instrumented %2u locations in " cBRI "%s" cRST), finst_blocks, - function_name(fun)); + } else if (line.compare(0, 4, "src:") == 0) { - } + is_file = 1; + line = line.substr(4); - return 0; + } else if (line.compare(0, 7, "source:") == 0) { -} + is_file = 1; + line = line.substr(7); -/* -------------------------------------------------------------------------- */ -/* -- Boilerplate and initialization ---------------------------------------- */ + } -static const struct pass_data afl_pass_data = { + if (line.find(":") != std::string::npos) { - .type = GIMPLE_PASS, - .name = "afl-inst", - .optinfo_flags = OPTGROUP_NONE, + FATAL("invalid line in AFL_GCC_DENYLIST: %s", original_line.c_str()); - .tv_id = TV_NONE, - .properties_required = 0, - .properties_provided = 0, - .properties_destroyed = 0, - .todo_flags_start = 0, - // NOTE(aseipp): it's very, very important to include - // at least 'TODO_update_ssa' here so that GCC will - // properly update the resulting SSA form, e.g., to - // include new PHI nodes for newly added symbols or - // names. Do not remove this. Do not taunt Happy Fun - // Ball. - .todo_flags_finish = TODO_update_ssa | TODO_verify_il | TODO_cleanup_cfg, + } -}; + if (line.length() > 0) { -namespace { + // if the entry contains / or . it must be a file + if (is_file == -1) + if (line.find("/") != std::string::npos || + line.find(".") != std::string::npos) + is_file = 1; + // otherwise it is a function -class afl_pass : public gimple_opt_pass { + if (is_file == 1) + denyListFiles.push_back(line); + else + denyListFunctions.push_back(line); + getline(fileStream, line); - private: - bool do_ext_call; + } - public: - afl_pass(bool ext_call, gcc::context *g) - : gimple_opt_pass(afl_pass_data, g), do_ext_call(ext_call) { + } + + if (debug) + SAYF(cMGN "[D] " cRST + "loaded denylist with %zu file and %zu function entries\n", + denyListFiles.size(), denyListFunctions.size()); + + } } - unsigned int execute(function *fun) override { + std::string getSourceName(function *F) { + + return DECL_SOURCE_FILE(F->decl); + + } - if (!myInstrumentList.empty()) { + bool isInInstrumentList(function *F) { - bool instrumentBlock = false; - std::string instFilename; - unsigned int instLine = 0; + bool return_default = true; - /* EXPR_FILENAME - This macro returns the name of the file in which the entity was declared, - as a char*. For an entity declared implicitly by the compiler (like - __builtin_ memcpy), this will be the string "<internal>". - */ - const char *fname = DECL_SOURCE_FILE(fun->decl); + // is this a function with code? If it is external we don't instrument it + // anyway and it can't be in the instrument file list. Or if it is it is + // ignored. + if (isIgnoreFunction(F)) return false; - if (0 != strncmp("<internal>", fname, 10) && - 0 != strncmp("<built-in>", fname, 10)) { + if (!denyListFiles.empty() || !denyListFunctions.empty()) { - instFilename = fname; - instLine = DECL_SOURCE_LINE(fun->decl); + if (!denyListFunctions.empty()) { - /* Continue only if we know where we actually are */ - if (!instFilename.empty()) { + std::string instFunction = IDENTIFIER_POINTER (DECL_NAME (F->decl)); - for (std::list<std::string>::iterator it = myInstrumentList.begin(); - it != myInstrumentList.end(); ++it) { + for (std::list<std::string>::iterator it = denyListFunctions.begin(); + it != denyListFunctions.end(); ++it) { - /* We don't check for filename equality here because - * filenames might actually be full paths. Instead we - * check that the actual filename ends in the filename - * specified in the list. */ - if (instFilename.length() >= it->length()) { + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ - if (instFilename.compare(instFilename.length() - it->length(), - it->length(), *it) == 0) { + if (instFunction.length() >= it->length()) { - instrumentBlock = true; - break; + if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) { - } + if (debug) + SAYF(cMGN "[D] " cRST + "Function %s is in the deny function list, " + "not instrumenting ... \n", + instFunction.c_str()); + return false; - } + } - } + } - } + } } - /* Either we couldn't figure out our location or the location is - * not in the instrument list, so we skip instrumentation. */ - if (!instrumentBlock) { + if (!denyListFiles.empty()) { - if (!be_quiet) { + std::string source_file = getSourceName(F); - if (!instFilename.empty()) - SAYF(cYEL "[!] " cBRI - "Not in instrument list, skipping %s line %u...\n", - instFilename.c_str(), instLine); - else - SAYF(cYEL "[!] " cBRI "No filename information found, skipping it"); + if (!source_file.empty()) { - } + for (std::list<std::string>::iterator it = denyListFiles.begin(); + it != denyListFiles.end(); ++it) { - return 0; + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ - } + if (source_file.length() >= it->length()) { - } + if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) { - return do_ext_call ? ext_call_instrument(fun) : inline_instrument(fun); + return false; - } + } -}; /* class afl_pass */ + } -} // namespace + } -static struct opt_pass *make_afl_pass(bool ext_call, gcc::context *ctxt) { + } else { - return new afl_pass(ext_call, ctxt); + // we could not find out the location. in this case we say it is not + // in the instrument file list + if (!be_quiet) + WARNF( + "No debug information found for function %s, will be " + "instrumented (recompile with -g -O[1-3]).", + IDENTIFIER_POINTER (DECL_NAME (F->decl))); -} + } -/* -------------------------------------------------------------------------- */ -/* -- Initialization -------------------------------------------------------- */ + } -int plugin_is_GPL_compatible = 1; + } -static struct plugin_info afl_plugin_info = { + // if we do not have a instrument file list return true + if (!allowListFiles.empty() || !allowListFunctions.empty()) { - .version = "20200519", - .help = "AFL++ gcc plugin\n", + return_default = false; -}; + if (!allowListFunctions.empty()) { -int plugin_init(struct plugin_name_args * plugin_info, - struct plugin_gcc_version *version) { + std::string instFunction = IDENTIFIER_POINTER(DECL_NAME(F->decl)); - struct register_pass_info afl_pass_info; - struct timeval tv; - struct timezone tz; - u32 rand_seed; + for (std::list<std::string>::iterator it = allowListFunctions.begin(); + it != allowListFunctions.end(); ++it) { - /* Setup random() so we get Actually Random(TM) outputs from R() */ - gettimeofday(&tv, &tz); - rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); - SR(rand_seed); + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ - /* Pass information */ - afl_pass_info.pass = make_afl_pass(inst_ext, g); - afl_pass_info.reference_pass_name = "ssa"; - afl_pass_info.ref_pass_instance_number = 1; - afl_pass_info.pos_op = PASS_POS_INSERT_AFTER; + if (instFunction.length() >= it->length()) { - if (!plugin_default_version_check(version, &gcc_version)) { + if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) { - FATAL(G_("Incompatible gcc/plugin versions! Expected GCC %d.%d"), - GCCPLUGIN_VERSION_MAJOR, GCCPLUGIN_VERSION_MINOR); + if (debug) + SAYF(cMGN "[D] " cRST + "Function %s is in the allow function list, " + "instrumenting ... \n", + instFunction.c_str()); + return true; - } + } - /* Show a banner */ - if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + } - SAYF(G_(cCYA "afl-gcc-pass" VERSION cRST - " initially by <aseipp@pobox.com>, maintainer: hexcoder-\n")); + } - } else + } - be_quiet = 1; + if (!allowListFiles.empty()) { - /* Decide instrumentation ratio */ - char *inst_ratio_str = getenv("AFL_INST_RATIO"); + std::string source_file = getSourceName(F); - if (inst_ratio_str) { + if (!source_file.empty()) { - if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || - inst_ratio > 100) - FATAL(G_("Bad value of AFL_INST_RATIO (must be between 1 and 100)")); - else { + for (std::list<std::string>::iterator it = allowListFiles.begin(); + it != allowListFiles.end(); ++it) { - if (!be_quiet) - ACTF(G_("%s instrumentation at ratio of %u%% in %s mode."), - inst_ext ? G_("Call-based") : G_("Inline"), inst_ratio, - getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened")); + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ - } + if (source_file.length() >= it->length()) { - } + if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) { - char *instInstrumentListFilename = getenv("AFL_GCC_INSTRUMENT_FILE"); - if (!instInstrumentListFilename) - instInstrumentListFilename = getenv("AFL_GCC_WHITELIST"); - if (instInstrumentListFilename) { + if (debug) + SAYF(cMGN "[D] " cRST + "Function %s is in the allowlist (%s), " + "instrumenting ... \n", + IDENTIFIER_POINTER (DECL_NAME (F->decl)), + source_file.c_str()); + return true; - std::string line; - std::ifstream fileStream; - fileStream.open(instInstrumentListFilename); - if (!fileStream) PFATAL("Unable to open AFL_GCC_INSTRUMENT_FILE"); - getline(fileStream, line); - while (fileStream) { + } - myInstrumentList.push_back(line); - getline(fileStream, line); + } - } + } - } else if (!be_quiet && (getenv("AFL_LLVM_WHITELIST") || + } else { - getenv("AFL_LLVM_INSTRUMENT_FILE"))) { + // we could not find out the location. In this case we say it is not + // in the instrument file list + if (!be_quiet) + WARNF( + "No debug information found for function %s, will not be " + "instrumented (recompile with -g -O[1-3]).", + IDENTIFIER_POINTER (DECL_NAME (F->decl))); + return false; - SAYF(cYEL "[-] " cRST - "AFL_LLVM_INSTRUMENT_FILE environment variable detected - did " - "you mean AFL_GCC_INSTRUMENT_FILE?\n"); + } + + } + + } + + return return_default; } - /* Go go gadget */ - register_callback(plugin_info->base_name, PLUGIN_INFO, NULL, - &afl_plugin_info); - register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL, - &afl_pass_info); - return 0; + +}; + +static struct plugin_info afl_plugin = + { + .version = "20200907", + .help = G_("AFL gcc plugin\n\ +\n\ +Set AFL_QUIET in the environment to silence it.\n\ +\n\ +Set AFL_INST_RATIO in the environment to a number from 0 to 100\n\ +to control how likely a block will be chosen for instrumentation.\n\ +\n\ +Specify -frandom-seed for reproducible instrumentation.\n\ +"), + }; } +/* This is the function GCC calls when loading a plugin. Initialize + and register further callbacks. */ +int +plugin_init (struct plugin_name_args *info, + struct plugin_gcc_version *version) +{ + if (!plugin_default_version_check (version, &gcc_version)) + FATAL (G_("GCC and plugin have incompatible versions, expected GCC %d.%d"), + GCCPLUGIN_VERSION_MAJOR, GCCPLUGIN_VERSION_MINOR); + + /* Show a banner. */ + bool quiet = false; + if (isatty (2) && !getenv ("AFL_QUIET")) + SAYF (cCYA "afl-gcc-pass " cBRI VERSION cRST " by <oliva@adacore.com>\n"); + else + quiet = true; + + /* Decide instrumentation ratio. */ + int inst_ratio = 100; + if (char *inst_ratio_str = getenv ("AFL_INST_RATIO")) + if (sscanf (inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || + inst_ratio > 100) + FATAL (G_("Bad value of AFL_INST_RATIO (must be between 1 and 100)")); + + /* Initialize the random number generator with GCC's random seed, in + case it was specified in the command line's -frandom-seed for + reproducible instrumentation. */ + srandom (get_random_seed (false)); + + const char *name = info->base_name; + register_callback (name, PLUGIN_INFO, NULL, &afl_plugin); + + afl_pass *aflp = new afl_pass (quiet, inst_ratio); + struct register_pass_info pass_info = + { + .pass = aflp, + .reference_pass_name = "ssa", + .ref_pass_instance_number = 1, + .pos_op = PASS_POS_INSERT_AFTER, + }; + register_callback (name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); + register_callback (name, PLUGIN_FINISH, afl_pass::plugin_finalize, + pass_info.pass); + + if (!quiet) + ACTF(G_("%s instrumentation at ratio of %u%% in %s mode."), + aflp->out_of_line ? G_("Call-based") : G_("Inline"), inst_ratio, + getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened")); + + return 0; +} |