about summary refs log tree commit diff
path: root/instrumentation/afl-gcc-pass.so.cc
diff options
context:
space:
mode:
Diffstat (limited to 'instrumentation/afl-gcc-pass.so.cc')
-rw-r--r--instrumentation/afl-gcc-pass.so.cc601
1 files changed, 601 insertions, 0 deletions
diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc
new file mode 100644
index 00000000..c5614aca
--- /dev/null
+++ b/instrumentation/afl-gcc-pass.so.cc
@@ -0,0 +1,601 @@
+//
+// There are some TODOs in this file:
+//   - fix instrumentation via external call
+//   - fix inline instrumentation
+//   - implement instrument list feature
+//   - dont instrument blocks that are uninteresting
+//   - implement neverZero
+//
+
+/*
+   american fuzzy lop++ - GCC instrumentation pass
+   ---------------------------------------------
+
+   Written by Austin Seipp <aseipp@pobox.com> with bits from
+              Emese Revfy <re.emese@gmail.com>
+
+   Fixed by Heiko Eißfeldt 2019-2020 for AFL++
+
+   GCC integration design is based on the LLVM design, which comes
+   from Laszlo Szekeres. Some of the boilerplate code below for
+   afl_pass to adapt to different GCC versions was taken from Emese
+   Revfy's Size Overflow plugin for GCC, licensed under the GPLv2/v3.
+
+   (NOTE: this plugin code is under GPLv3, in order to comply with the
+   GCC runtime library exception, which states that you may distribute
+   "Target Code" from the compiler under a license of your choice, as
+   long as the "Compilation Process" is "Eligible", and contains no
+   GPL-incompatible software in GCC "during the process of
+   transforming high level code to target code". In this case, the
+   plugin will be used to generate "Target Code" during the
+   "Compilation Process", and thus it must be GPLv3 to be "eligible".)
+
+   Copyright (C) 2015 Austin Seipp
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+ */
+
+#define BUILD_INLINE_INST
+
+#include "../include/config.h"
+#include "../include/debug.h"
+
+/* clear helper macros AFL types pull in, which intervene with gcc-plugin
+ * headers from GCC-8 */
+#ifdef likely
+  #undef likely
+#endif
+#ifdef unlikely
+  #undef unlikely
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+
+#include <gcc-plugin.h>
+#include <plugin-version.h>
+#include <diagnostic.h>
+#include <tree.h>
+#include <tree-ssa.h>
+#include <tree-pass.h>
+#include <tree-ssa-alias.h>
+#include <basic-block.h>
+#include <gimple-expr.h>
+#include <gimple.h>
+#include <gimple-iterator.h>
+#include <gimple-ssa.h>
+#include <version.h>
+#include <toplev.h>
+#include <intl.h>
+#include <context.h>
+#include <stringpool.h>
+#include <cgraph.h>
+#include <cfgloop.h>
+
+/* -------------------------------------------------------------------------- */
+/* -- AFL instrumentation pass ---------------------------------------------- */
+
+static int                    be_quiet = 0;
+static unsigned int           inst_ratio = 100;
+static bool                   inst_ext = true;
+static std::list<std::string> myInstrumentList;
+
+static unsigned int ext_call_instrument(function *fun) {
+
+  /* Instrument all the things! */
+  basic_block bb;
+  unsigned    finst_blocks = 0;
+  unsigned    fcnt_blocks = 0;
+
+  tree fntype = build_function_type_list(void_type_node,          /* return */
+                                         uint32_type_node,          /* args */
+                                         NULL_TREE);                /* done */
+  tree fndecl = build_fn_decl("__afl_trace", fntype);
+  TREE_STATIC(fndecl) = 1;                             /* Defined elsewhere */
+  TREE_PUBLIC(fndecl) = 1;                                        /* Public */
+  DECL_EXTERNAL(fndecl) = 1;                            /* External linkage */
+  DECL_ARTIFICIAL(fndecl) = 1;                      /* Injected by compiler */
+
+  FOR_EACH_BB_FN(bb, fun) {
+
+    gimple_seq           fcall;
+    gimple_seq           seq = NULL;
+    gimple_stmt_iterator bentry;
+    ++fcnt_blocks;
+
+    // only instrument if this basic block is the destination of a previous
+    // basic block that has multiple successors
+    // this gets rid of ~5-10% of instrumentations that are unnecessary
+    // result: a little more speed and less map pollution
+
+    int           more_than_one = -1;
+    edge          ep;
+    edge_iterator eip;
+
+    FOR_EACH_EDGE(ep, eip, bb->preds) {
+
+      int count = 0;
+      if (more_than_one == -1) more_than_one = 0;
+
+      basic_block   Pred = ep->src;
+      edge          es;
+      edge_iterator eis;
+      FOR_EACH_EDGE(es, eis, Pred->succs) {
+
+        basic_block Succ = es->dest;
+        if (Succ != NULL) count++;
+
+      }
+
+      if (count > 1) more_than_one = 1;
+
+    }
+
+    if (more_than_one != 1) continue;
+
+    /* Bail on this block if we trip the specified ratio */
+    if (R(100) >= inst_ratio) continue;
+
+    /* Make up cur_loc */
+    unsigned int rand_loc = R(MAP_SIZE);
+    tree         cur_loc = build_int_cst(uint32_type_node, rand_loc);
+
+    /* Update bitmap via external call */
+    /* to quote:
+     * /+ Trace a basic block with some ID +/
+     * void __afl_trace(u32 x);
+     */
+
+    fcall = gimple_build_call(
+        fndecl, 1,
+        cur_loc); /* generate the function _call_ to above built reference, with
+                   *1* parameter -> the random const for the location */
+    gimple_seq_add_stmt(&seq, fcall);         /* and insert into a sequence */
+
+    /* Done - grab the entry to the block and insert sequence */
+    bentry = gsi_after_labels(bb);
+    gsi_insert_seq_before(&bentry, seq, GSI_SAME_STMT);
+
+    ++finst_blocks;
+
+  }
+
+  /* Say something nice. */
+  if (!be_quiet) {
+
+    if (!finst_blocks)
+      WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST),
+            function_name(fun));
+    else if (finst_blocks < fcnt_blocks)
+      OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST),
+          finst_blocks, fcnt_blocks, function_name(fun));
+    else
+      OKF(G_("Instrumented %2u locations in " cBRI "%s" cRST), finst_blocks,
+          function_name(fun));
+
+  }
+
+  return 0;
+
+}
+
+static unsigned int inline_instrument(function *fun) {
+
+  /* Instrument all the things! */
+  basic_block bb;
+  unsigned    finst_blocks = 0;
+  unsigned    fcnt_blocks = 0;
+  tree        one = build_int_cst(unsigned_char_type_node, 1);
+  // tree zero = build_int_cst(unsigned_char_type_node, 0);
+
+  /* Set up global type declarations */
+  tree map_type = build_pointer_type(unsigned_char_type_node);
+  tree map_ptr_g =
+      build_decl(UNKNOWN_LOCATION, VAR_DECL,
+                 get_identifier_with_length("__afl_area_ptr", 14), map_type);
+  TREE_USED(map_ptr_g) = 1;
+  TREE_STATIC(map_ptr_g) = 1;                          /* Defined elsewhere */
+  DECL_EXTERNAL(map_ptr_g) = 1;                         /* External linkage */
+  DECL_PRESERVE_P(map_ptr_g) = 1;
+  DECL_ARTIFICIAL(map_ptr_g) = 1;                   /* Injected by compiler */
+  rest_of_decl_compilation(map_ptr_g, 1, 0);
+
+  tree prev_loc_g = build_decl(UNKNOWN_LOCATION, VAR_DECL,
+                               get_identifier_with_length("__afl_prev_loc", 14),
+                               uint32_type_node);
+  TREE_USED(prev_loc_g) = 1;
+  TREE_STATIC(prev_loc_g) = 1;                         /* Defined elsewhere */
+  DECL_EXTERNAL(prev_loc_g) = 1;                        /* External linkage */
+  DECL_PRESERVE_P(prev_loc_g) = 1;
+  DECL_ARTIFICIAL(prev_loc_g) = 1;                  /* Injected by compiler */
+  set_decl_tls_model(prev_loc_g, TLS_MODEL_REAL);          /* TLS attribute */
+  rest_of_decl_compilation(prev_loc_g, 1, 0);
+
+  FOR_EACH_BB_FN(bb, fun) {
+
+    gimple_seq           seq = NULL;
+    gimple_stmt_iterator bentry;
+    ++fcnt_blocks;
+
+    // only instrument if this basic block is the destination of a previous
+    // basic block that has multiple successors
+    // this gets rid of ~5-10% of instrumentations that are unnecessary
+    // result: a little more speed and less map pollution
+
+    int           more_than_one = -1;
+    edge          ep;
+    edge_iterator eip;
+    FOR_EACH_EDGE(ep, eip, bb->preds) {
+
+      int count = 0;
+      if (more_than_one == -1) more_than_one = 0;
+
+      basic_block   Pred = ep->src;
+      edge          es;
+      edge_iterator eis;
+      FOR_EACH_EDGE(es, eis, Pred->succs) {
+
+        basic_block Succ = es->dest;
+        if (Succ != NULL) count++;
+
+      }
+
+      if (count > 1) more_than_one = 1;
+
+    }
+
+    if (more_than_one != 1) continue;
+
+    /* Bail on this block if we trip the specified ratio */
+    if (R(100) >= inst_ratio) continue;
+
+    /* Make up cur_loc */
+
+    unsigned int rand_loc = R(MAP_SIZE);
+    tree         cur_loc = build_int_cst(uint32_type_node, rand_loc);
+
+    /* Load prev_loc, xor with cur_loc */
+    // gimple_assign <var_decl, prev_loc.0_1, prev_loc, NULL, NULL>
+    tree     prev_loc = create_tmp_var_raw(uint32_type_node, "prev_loc");
+    gassign *g = gimple_build_assign(prev_loc, VAR_DECL, prev_loc_g);
+    gimple_seq_add_stmt(&seq, g);  // load prev_loc
+    update_stmt(g);
+
+    // gimple_assign <bit_xor_expr, _2, prev_loc.0_1, 47231, NULL>
+    tree area_off = create_tmp_var_raw(uint32_type_node, "area_off");
+    g = gimple_build_assign(area_off, BIT_XOR_EXPR, prev_loc, cur_loc);
+    gimple_seq_add_stmt(&seq, g);  // area_off = prev_loc ^ cur_loc
+    update_stmt(g);
+
+    /* Update bitmap */
+
+    // gimple_assign <addr_expr, p_6, &map[_2], NULL, NULL>
+    tree map_ptr = create_tmp_var(map_type, "map_ptr");
+    tree map_ptr2 = create_tmp_var(map_type, "map_ptr2");
+
+    g = gimple_build_assign(map_ptr, map_ptr_g);
+    gimple_seq_add_stmt(&seq, g);  // map_ptr = __afl_area_ptr
+    update_stmt(g);
+
+#if 1
+  #if 0
+		tree addr = build2(ADDR_EXPR, map_type, map_ptr, area_off);
+		g = gimple_build_assign(map_ptr2, MODIFY_EXPR, addr);
+		gimple_seq_add_stmt(&seq, g); // map_ptr2 = map_ptr + area_off
+		update_stmt(g);
+  #else
+    g = gimple_build_assign(map_ptr2, PLUS_EXPR, map_ptr, area_off);
+    gimple_seq_add_stmt(&seq, g);  // map_ptr2 = map_ptr + area_off
+    update_stmt(g);
+  #endif
+
+    // gimple_assign <mem_ref, _3, *p_6, NULL, NULL>
+    tree tmp1 = create_tmp_var_raw(unsigned_char_type_node, "tmp1");
+    g = gimple_build_assign(tmp1, MEM_REF, map_ptr2);
+    gimple_seq_add_stmt(&seq, g);  // tmp1 = *map_ptr2
+    update_stmt(g);
+#else
+    tree atIndex = build2(PLUS_EXPR, uint32_type_node, map_ptr, area_off);
+    tree array_address = build1(ADDR_EXPR, map_type, atIndex);
+    tree array_access = build1(INDIRECT_REF, map_type, array_address);
+    tree tmp1 = create_tmp_var(unsigned_char_type_node, "tmp1");
+    g = gimple_build_assign(tmp1, array_access);
+    gimple_seq_add_stmt(&seq, g);  // tmp1 = *(map_ptr + area_off)
+    update_stmt(g);
+#endif
+    // gimple_assign <plus_expr, _4, _3, 1, NULL>
+    tree tmp2 = create_tmp_var_raw(unsigned_char_type_node, "tmp2");
+    g = gimple_build_assign(tmp2, PLUS_EXPR, tmp1, one);
+    gimple_seq_add_stmt(&seq, g);  // tmp2 = tmp1 + 1
+    update_stmt(g);
+
+    // TODO: neverZero: here we have to check if tmp3 == 0
+    //                  and add 1 if so
+
+    // gimple_assign <ssa_name, *p_6, _4, NULL, NULL>
+    //		tree map_ptr3 = create_tmp_var_raw(map_type, "map_ptr3");
+    g = gimple_build_assign(map_ptr2, INDIRECT_REF, tmp2);
+    gimple_seq_add_stmt(&seq, g);  // *map_ptr2 = tmp2
+    update_stmt(g);
+
+    /* Set prev_loc to cur_loc >> 1 */
+
+    // gimple_assign <integer_cst, prev_loc, 23615, NULL, NULL>
+    tree shifted_loc = build_int_cst(TREE_TYPE(prev_loc_g), rand_loc >> 1);
+    tree prev_loc2 = create_tmp_var_raw(uint32_type_node, "prev_loc2");
+    g = gimple_build_assign(prev_loc2, shifted_loc);
+    gimple_seq_add_stmt(&seq, g);  // __afl_prev_loc = cur_loc >> 1
+    update_stmt(g);
+    g = gimple_build_assign(prev_loc_g, prev_loc2);
+    gimple_seq_add_stmt(&seq, g);  // __afl_prev_loc = cur_loc >> 1
+    update_stmt(g);
+
+    /* Done - grab the entry to the block and insert sequence */
+
+    bentry = gsi_after_labels(bb);
+    gsi_insert_seq_before(&bentry, seq, GSI_NEW_STMT);
+
+    ++finst_blocks;
+
+  }
+
+  /* Say something nice. */
+  if (!be_quiet) {
+
+    if (!finst_blocks)
+      WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST),
+            function_name(fun));
+    else if (finst_blocks < fcnt_blocks)
+      OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST),
+          finst_blocks, fcnt_blocks, function_name(fun));
+    else
+      OKF(G_("Instrumented   %2u   locations in " cBRI "%s" cRST), finst_blocks,
+          function_name(fun));
+
+  }
+
+  return 0;
+
+}
+
+/* -------------------------------------------------------------------------- */
+/* -- Boilerplate and initialization ---------------------------------------- */
+
+static const struct pass_data afl_pass_data = {
+
+    .type = GIMPLE_PASS,
+    .name = "afl-inst",
+    .optinfo_flags = OPTGROUP_NONE,
+
+    .tv_id = TV_NONE,
+    .properties_required = 0,
+    .properties_provided = 0,
+    .properties_destroyed = 0,
+    .todo_flags_start = 0,
+    // NOTE(aseipp): it's very, very important to include
+    // at least 'TODO_update_ssa' here so that GCC will
+    // properly update the resulting SSA form, e.g., to
+    // include new PHI nodes for newly added symbols or
+    // names. Do not remove this. Do not taunt Happy Fun
+    // Ball.
+    .todo_flags_finish = TODO_update_ssa | TODO_verify_il | TODO_cleanup_cfg,
+
+};
+
+namespace {
+
+class afl_pass : public gimple_opt_pass {
+
+ private:
+  bool do_ext_call;
+
+ public:
+  afl_pass(bool ext_call, gcc::context *g)
+      : gimple_opt_pass(afl_pass_data, g), do_ext_call(ext_call) {
+
+  }
+
+  unsigned int execute(function *fun) override {
+
+    if (!myInstrumentList.empty()) {
+
+      bool         instrumentBlock = false;
+      std::string  instFilename;
+      unsigned int instLine = 0;
+
+      /* EXPR_FILENAME
+      This macro returns the name of the file in which the entity was declared,
+      as a char*. For an entity declared implicitly by the compiler (like
+      __builtin_ memcpy), this will be the string "<internal>".
+      */
+      const char *fname = DECL_SOURCE_FILE(fun->decl);
+
+      if (0 != strncmp("<internal>", fname, 10) &&
+          0 != strncmp("<built-in>", fname, 10)) {
+
+        instFilename = fname;
+        instLine = DECL_SOURCE_LINE(fun->decl);
+
+        /* Continue only if we know where we actually are */
+        if (!instFilename.empty()) {
+
+          for (std::list<std::string>::iterator it = myInstrumentList.begin();
+               it != myInstrumentList.end(); ++it) {
+
+            /* We don't check for filename equality here because
+             * filenames might actually be full paths. Instead we
+             * check that the actual filename ends in the filename
+             * specified in the list. */
+            if (instFilename.length() >= it->length()) {
+
+              if (instFilename.compare(instFilename.length() - it->length(),
+                                       it->length(), *it) == 0) {
+
+                instrumentBlock = true;
+                break;
+
+              }
+
+            }
+
+          }
+
+        }
+
+      }
+
+      /* Either we couldn't figure out our location or the location is
+       * not in the instrument list, so we skip instrumentation. */
+      if (!instrumentBlock) {
+
+        if (!be_quiet) {
+
+          if (!instFilename.empty())
+            SAYF(cYEL "[!] " cBRI
+                      "Not in instrument list, skipping %s line %u...\n",
+                 instFilename.c_str(), instLine);
+          else
+            SAYF(cYEL "[!] " cBRI "No filename information found, skipping it");
+
+        }
+
+        return 0;
+
+      }
+
+    }
+
+    return do_ext_call ? ext_call_instrument(fun) : inline_instrument(fun);
+
+  }
+
+};                                                        /* class afl_pass */
+
+}  // namespace
+
+static struct opt_pass *make_afl_pass(bool ext_call, gcc::context *ctxt) {
+
+  return new afl_pass(ext_call, ctxt);
+
+}
+
+/* -------------------------------------------------------------------------- */
+/* -- Initialization -------------------------------------------------------- */
+
+int plugin_is_GPL_compatible = 1;
+
+static struct plugin_info afl_plugin_info = {
+
+    .version = "20200519",
+    .help = "AFL++ gcc plugin\n",
+
+};
+
+int plugin_init(struct plugin_name_args *  plugin_info,
+                struct plugin_gcc_version *version) {
+
+  struct register_pass_info afl_pass_info;
+  struct timeval            tv;
+  struct timezone           tz;
+  u32                       rand_seed;
+
+  /* Setup random() so we get Actually Random(TM) outputs from R() */
+  gettimeofday(&tv, &tz);
+  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
+  SR(rand_seed);
+
+  /* Pass information */
+  afl_pass_info.pass = make_afl_pass(inst_ext, g);
+  afl_pass_info.reference_pass_name = "ssa";
+  afl_pass_info.ref_pass_instance_number = 1;
+  afl_pass_info.pos_op = PASS_POS_INSERT_AFTER;
+
+  if (!plugin_default_version_check(version, &gcc_version)) {
+
+    FATAL(G_("Incompatible gcc/plugin versions! Expected GCC %d.%d"),
+          GCCPLUGIN_VERSION_MAJOR, GCCPLUGIN_VERSION_MINOR);
+
+  }
+
+  /* Show a banner */
+  if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
+
+    SAYF(G_(cCYA "afl-gcc-pass" VERSION cRST
+                 " initially by <aseipp@pobox.com>, maintainer: hexcoder-\n"));
+
+  } else
+
+    be_quiet = 1;
+
+  /* Decide instrumentation ratio */
+  char *inst_ratio_str = getenv("AFL_INST_RATIO");
+
+  if (inst_ratio_str) {
+
+    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio ||
+        inst_ratio > 100)
+      FATAL(G_("Bad value of AFL_INST_RATIO (must be between 1 and 100)"));
+    else {
+
+      if (!be_quiet)
+        ACTF(G_("%s instrumentation at ratio of %u%% in %s mode."),
+             inst_ext ? G_("Call-based") : G_("Inline"), inst_ratio,
+             getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened"));
+
+    }
+
+  }
+
+  char *instInstrumentListFilename = getenv("AFL_GCC_INSTRUMENT_FILE");
+  if (!instInstrumentListFilename)
+    instInstrumentListFilename = getenv("AFL_GCC_WHITELIST");
+  if (instInstrumentListFilename) {
+
+    std::string   line;
+    std::ifstream fileStream;
+    fileStream.open(instInstrumentListFilename);
+    if (!fileStream) PFATAL("Unable to open AFL_GCC_INSTRUMENT_FILE");
+    getline(fileStream, line);
+    while (fileStream) {
+
+      myInstrumentList.push_back(line);
+      getline(fileStream, line);
+
+    }
+
+  } else if (!be_quiet && (getenv("AFL_LLVM_WHITELIST") ||
+
+                           getenv("AFL_LLVM_INSTRUMENT_FILE"))) {
+
+    SAYF(cYEL "[-] " cRST
+              "AFL_LLVM_INSTRUMENT_FILE environment variable detected - did "
+              "you mean AFL_GCC_INSTRUMENT_FILE?\n");
+
+  }
+
+  /* Go go gadget */
+  register_callback(plugin_info->base_name, PLUGIN_INFO, NULL,
+                    &afl_plugin_info);
+  register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+                    &afl_pass_info);
+  return 0;
+
+}
+