aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2019-06-25 12:00:12 +0200
committervan Hauser <vh@thc.org>2019-06-25 12:00:12 +0200
commit0104e99caabd83e7d53f7b1248425991f4c0c431 (patch)
tree1d74694a70074c43f3182d2cfcd5a1b36f31cf3f
parente16593c9b1be2686279efe182465de5422d2ca55 (diff)
downloadafl++-0104e99caabd83e7d53f7b1248425991f4c0c431.tar.gz
llvm_mode whitelist (partial instrumentation) support added
-rw-r--r--docs/ChangeLog3
-rw-r--r--docs/README4
-rw-r--r--docs/env_variables.txt46
-rw-r--r--llvm_mode/README.whitelist75
-rw-r--r--llvm_mode/afl-llvm-pass.so.cc70
5 files changed, 185 insertions, 13 deletions
diff --git a/docs/ChangeLog b/docs/ChangeLog
index ea6e59bc..73c69196 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -17,6 +17,9 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
Version ++2.52d (tbd):
-----------------------------
+ - added whitelist support for llvm_mode via AFL_LLVM_WHITELIST to allow
+ only to instrument what is actually interesting. Gives more speed and less
+ map pollution (originally by choller@mozilla)
- added Python Module mutator support, python2.7-dev is autodetected.
see docs/python_mutators.txt (originally by choller@mozilla)
- added AFL_CAL_FAST for slow applications and AFL_DEBUG_CHILD_OUTPUT for debugging
diff --git a/docs/README b/docs/README
index 1d0dfb34..41a9459f 100644
--- a/docs/README
+++ b/docs/README
@@ -21,8 +21,8 @@ american fuzzy lop plus plus
https://github.com/andreafioraldi/afl and got the community patches applied
to it.
- C. Hoellers Python mutator module support was added too
- (https://github.com/choeller/afl)
+ C. Hoellers afl-fuzz Python mutator module and llvm_mode whitelist support
+ was added too (https://github.com/choeller/afl)
So all in all this is the best-of AFL that is currently out there :-)
diff --git a/docs/env_variables.txt b/docs/env_variables.txt
index 07a74dc0..f5db3b4f 100644
--- a/docs/env_variables.txt
+++ b/docs/env_variables.txt
@@ -71,14 +71,8 @@ tools make fairly broad use of environmental variables:
- Setting AFL_CAL_FAST will speed up the initial calibration, if the
application is very slow
- - Setting AFL_DEBUG_CHILD_OUTPUT will not suppress the child output.
- Not pretty but good for debugging purposes.
-
- - For AFL_PYTHON_MODULE and AFL_PYTHON_ONLY - they require to be compiled
- with -DUSE_PYTHON. Please see docs/python_mutators.txt
-
-2) Settings for afl-clang-fast
-------------------------------
+2) Settings for afl-clang-fast / afl-clang-fast++
+-------------------------------------------------
The native LLVM instrumentation helper accepts a subset of the settings
discussed in section #1, with the exception of:
@@ -88,6 +82,30 @@ discussed in section #1, with the exception of:
- TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are
created.
+Then there are a few specific features that are only available in llvm_mode:
+
+ LAF-INTEL
+ =========
+ This great feature will split compares to series of single byte comparisons
+ to allow afl-fuzz to find otherwise rather impossible paths.
+
+ - Setting LAF_SPLIT_SWITCHES will split switch()es
+
+ - Setting LAF_TRANSFORM_COMPARES will split string compare functions
+
+ - Setting LAF_SPLIT_COMPARES will split > 8 bit CMP instructions
+
+ See llvm_mode/README.laf-intel for more information.
+
+ WHITELIST
+ =========
+ This feature allows selectively instrumentation of the source
+
+ - Setting AFL_LLVM_WHITELIST with a filename will only instrument those
+ files that match these names.
+
+ See llvm_mode/README.whitelist for more information.
+
Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc,
because functions are *not* instrumented unconditionally - so low values
will have a more striking effect. For this tool, 0 is not a valid choice.
@@ -141,8 +159,8 @@ checks or alter some of the more exotic semantics of the tool:
- AFL_TMPDIR is used to write the .cur_input file to if exists, and in
the normal output directory otherwise. You would use this to point to
- a ramdisk/tmpfs. This increases the speed by a very minimal value but
- also reduces the stress on SSDs.
+ a ramdisk/tmpfs. This increases the speed by a small value but also
+ reduces the stress on SSDs.
- When developing custom instrumentation on top of afl-fuzz, you can use
AFL_SKIP_BIN_CHECK to inhibit the checks for non-instrumented binaries
@@ -159,6 +177,11 @@ checks or alter some of the more exotic semantics of the tool:
mutated files - say, to fix up checksums. See experimental/post_library/
for more.
+ - For AFL_PYTHON_MODULE and AFL_PYTHON_ONLY - they require to be compiled
+ with -DUSE_PYTHON. Please see docs/python_mutators.txt
+ This feature allows to configure custom mutators which can be very helpful
+ in e.g. fuzzing XML or other highly flexible structured input.
+
- AFL_FAST_CAL keeps the calibration stage about 2.5x faster (albeit less
precise), which can help when starting a session against a slow target.
@@ -183,6 +206,9 @@ checks or alter some of the more exotic semantics of the tool:
processing the first queue entry; and AFL_BENCH_UNTIL_CRASH causes it to
exit soon after the first crash is found.
+ - Setting AFL_DEBUG_CHILD_OUTPUT will not suppress the child output.
+ Not pretty but good for debugging purposes.
+
4) Settings for afl-qemu-trace
------------------------------
diff --git a/llvm_mode/README.whitelist b/llvm_mode/README.whitelist
new file mode 100644
index 00000000..ae044749
--- /dev/null
+++ b/llvm_mode/README.whitelist
@@ -0,0 +1,75 @@
+========================================
+Using afl++ with partial instrumentation
+========================================
+
+ This file describes how you can selectively instrument only the source files
+ that are interesting to you using the LLVM instrumentation provided by
+ afl++
+
+ Originally developed by Christian Holler (:decoder) <choller@mozilla.com>.
+
+
+1) Description and purpose
+--------------------------
+
+When building and testing complex programs where only a part of the program is
+the fuzzing target, it often helps to only instrument the necessary parts of
+the program, leaving the rest uninstrumented. This helps to focus the fuzzer
+on the important parts of the program, avoiding undesired noise and
+disturbance by uninteresting code being exercised.
+
+For this purpose, I have added a "partial instrumentation" support to the LLVM
+mode of AFLFuzz that allows you to specify on a source file level which files
+should be compiled with or without instrumentation.
+
+
+2) Building the LLVM module
+---------------------------
+
+The new code is part of the existing afl++ LLVM module in the llvm_mode/
+subdirectory. There is nothing specifically to do :)
+
+
+3) How to use the partial instrumentation mode
+----------------------------------------------
+
+In order to build with partial instrumentation, you need to build with
+afl-clang-fast and afl-clang-fast++ respectively. The only required change is
+that you need to set the environment variable AFL_LLVM_WHITELIST when calling
+the compiler.
+
+The environment variable must point to a file containing all the filenames
+that should be instrumented. For matching, the filename that is being compiled
+must end in the filename contained in this whitelist (to avoid breaking the
+matching when absolute paths are used during compilation).
+
+For example if your source tree looks like this:
+
+project/
+project/feature_a/a1.cpp
+project/feature_a/a2.cpp
+project/feature_b/b1.cpp
+project/feature_b/b2.cpp
+
+And you only want to test feature_a, then create a whitelist file containing:
+
+feature_a/a1.cpp
+feature_a/a2.cpp
+
+However if the whitelist file contains this, it works as well:
+
+a1.cpp
+a2.cpp
+
+but it might lead to files being unwantedly instrumented if the same filename
+exists somewhere else in the project.
+
+The created whitelist file is then set to AFL_INST_WHITELIST when you compile
+your program. For each file that didn't match the whitelist, the compiler will
+issue a warning at the end stating that no blocks were instrumented. If you
+didn't intend to instrument that file, then you can safely ignore that warning.
+
+For old LLVM versions this feature might require to be compiled with debug
+information (-g), however at least from llvm version 6.0 onwards this is not
+required anymore (and might hurt performance and crash detection, so better not
+use -g)
diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc
index 15b3764a..d46db7c0 100644
--- a/llvm_mode/afl-llvm-pass.so.cc
+++ b/llvm_mode/afl-llvm-pass.so.cc
@@ -31,6 +31,11 @@
#include <stdlib.h>
#include <unistd.h>
+#include <list>
+#include <string>
+#include <fstream>
+
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -48,7 +53,21 @@ namespace {
public:
static char ID;
- AFLCoverage() : ModulePass(ID) { }
+ AFLCoverage() : ModulePass(ID) {
+ char* instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
+ if (instWhiteListFilename) {
+ std::string line;
+ std::ifstream fileStream;
+ fileStream.open(instWhiteListFilename);
+ if (!fileStream)
+ report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
+ getline(fileStream, line);
+ while (fileStream) {
+ myWhitelist.push_back(line);
+ getline(fileStream, line);
+ }
+ }
+ }
bool runOnModule(Module &M) override;
@@ -56,6 +75,10 @@ namespace {
// return "American Fuzzy Lop Instrumentation";
// }
+ protected:
+
+ std::list<std::string> myWhitelist;
+
};
}
@@ -115,6 +138,51 @@ bool AFLCoverage::runOnModule(Module &M) {
BasicBlock::iterator IP = BB.getFirstInsertionPt();
IRBuilder<> IRB(&(*IP));
+
+ if (!myWhitelist.empty()) {
+ bool instrumentBlock = false;
+
+ /* Get the current location using debug information.
+ * For now, just instrument the block if we are not able
+ * to determine our location. */
+ DebugLoc Loc = IP->getDebugLoc();
+ if ( Loc ) {
+ DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+
+ unsigned int instLine = cDILoc->getLine();
+ StringRef instFilename = cDILoc->getFilename();
+
+ if (instFilename.str().empty()) {
+ /* If the original location is empty, try using the inlined location */
+ DILocation *oDILoc = cDILoc->getInlinedAt();
+ if (oDILoc) {
+ instFilename = oDILoc->getFilename();
+ instLine = oDILoc->getLine();
+ }
+ }
+
+ /* Continue only if we know where we actually are */
+ if (!instFilename.str().empty()) {
+ for (std::list<std::string>::iterator it = myWhitelist.begin(); it != myWhitelist.end(); ++it) {
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. */
+ if (instFilename.str().length() >= it->length()) {
+ if (instFilename.str().compare(instFilename.str().length() - it->length(), it->length(), *it) == 0) {
+ instrumentBlock = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Either we couldn't figure out our location or the location is
+ * not whitelisted, so we skip instrumentation. */
+ if (!instrumentBlock) continue;
+ }
+
if (AFL_R(100) >= inst_ratio) continue;