aboutsummaryrefslogtreecommitdiff
path: root/utils/optimin/src
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2021-08-20 23:54:59 +0200
committerGitHub <noreply@github.com>2021-08-20 23:54:59 +0200
commit2e15661f184c77ac1fbb6f868c894e946cbb7f17 (patch)
tree665b9368d2c1908cf71dbc4a76517f88c5317d9a /utils/optimin/src
parent32a0d6ac31554a47dca591f8978982758fb87677 (diff)
parentca9c87dd45d8b9a746a212cbc6ce85b78b637d8c (diff)
downloadafl++-2e15661f184c77ac1fbb6f868c894e946cbb7f17.tar.gz
Merge pull request #1074 from AFLplusplus/dev
push to stable
Diffstat (limited to 'utils/optimin/src')
-rw-r--r--utils/optimin/src/CMakeLists.txt12
-rw-r--r--utils/optimin/src/OptiMin.cpp702
2 files changed, 714 insertions, 0 deletions
diff --git a/utils/optimin/src/CMakeLists.txt b/utils/optimin/src/CMakeLists.txt
new file mode 100644
index 00000000..f31ceeaf
--- /dev/null
+++ b/utils/optimin/src/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_executable(optimin OptiMin.cpp)
+
+foreach(LIB MaLib EvalMaxSAT glucose)
+ target_include_directories(optimin PRIVATE
+ "${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src")
+ target_link_libraries(optimin ${LIB})
+endforeach(LIB)
+
+llvm_map_components_to_libnames(LLVM_LIBS support)
+target_link_libraries(optimin ${LLVM_LIBS})
+
+install(TARGETS optimin RUNTIME DESTINATION bin)
diff --git a/utils/optimin/src/OptiMin.cpp b/utils/optimin/src/OptiMin.cpp
new file mode 100644
index 00000000..ce1fb850
--- /dev/null
+++ b/utils/optimin/src/OptiMin.cpp
@@ -0,0 +1,702 @@
+/*
+ * OptiMin, an optimal fuzzing corpus minimizer.
+ *
+ * Author: Adrian Herrera
+ */
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+#include <llvm/ADT/DenseSet.h>
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringExtras.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/Support/Chrono.h>
+#include <llvm/Support/CommandLine.h>
+#include <llvm/Support/Error.h>
+#include <llvm/Support/FileSystem.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/Path.h>
+#include <llvm/Support/Program.h>
+#include <llvm/Support/WithColor.h>
+
+#include "EvalMaxSAT.h"
+
+using namespace llvm;
+
+namespace {
+
+// -------------------------------------------------------------------------- //
+// Classes
+// -------------------------------------------------------------------------- //
+
+/// Ensure seed weights default to 1
+class Weight {
+
+ public:
+ Weight() : Weight(1){};
+ Weight(uint32_t V) : Value(V){};
+
+ operator unsigned() const {
+
+ return Value;
+
+ }
+
+ private:
+ const unsigned Value;
+
+};
+
+// -------------------------------------------------------------------------- //
+// Typedefs
+// -------------------------------------------------------------------------- //
+
+/// AFL tuple (edge) ID
+using AFLTupleID = uint32_t;
+
+/// Pair of tuple ID and hit count
+using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>;
+
+/// Coverage for a given seed file
+using AFLCoverageVector = std::vector<AFLTuple>;
+
+/// Map seed file paths to its coverage vector
+using AFLCoverageMap = StringMap<AFLCoverageVector>;
+
+/// Map seed file paths to a weight
+using WeightsMap = StringMap<Weight>;
+
+/// A seed identifier in the MaxSAT solver
+using SeedID = int;
+
+/// Associates seed identifiers to seed files
+using MaxSATSeeds =
+ SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>;
+
+/// Set of literal identifiers
+using MaxSATSeedSet = DenseSet<SeedID>;
+
+/// Maps tuple IDs to the literal identifiers that "cover" that tuple
+using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>;
+
+// -------------------------------------------------------------------------- //
+// Global variables
+// -------------------------------------------------------------------------- //
+
+// This is based on the human class count in `count_class_human[256]` in
+// `afl-showmap.c`
+static constexpr uint32_t MAX_EDGE_FREQ = 8;
+
+// The maximum number of failures allowed when parsing a weights file
+static constexpr unsigned MAX_WEIGHT_FAILURES = 5;
+
+static sys::TimePoint<> StartTime, EndTime;
+static std::chrono::seconds Duration;
+
+static std::string ShowmapPath;
+static bool TargetArgsHasAtAt = false;
+static bool KeepTraces = false;
+static bool SkipBinCheck = false;
+
+static const auto ErrMsg = [] {
+
+ return WithColor(errs(), raw_ostream::RED, /*Bold=*/true) << "[-] ";
+
+};
+
+static const auto WarnMsg = [] {
+
+ return WithColor(errs(), raw_ostream::MAGENTA, /*Bold=*/true) << "[-] ";
+
+};
+
+static const auto SuccMsg = [] {
+
+ return WithColor(outs(), raw_ostream::GREEN, /*Bold=*/true) << "[+] ";
+
+};
+
+static const auto StatMsg = [] {
+
+ return WithColor(outs(), raw_ostream::BLUE, /*Bold=*/true) << "[*] ";
+
+};
+
+static cl::opt<std::string> InputDir("i", cl::desc("Input directory"),
+ cl::value_desc("dir"), cl::Required);
+static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"),
+ cl::value_desc("dir"), cl::Required);
+
+static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"),
+ cl::init(true));
+static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"),
+ cl::value_desc("csv"));
+
+static cl::opt<std::string> TargetProg(cl::Positional,
+ cl::desc("<target program>"),
+ cl::Required);
+static cl::list<std::string> TargetArgs(cl::ConsumeAfter,
+ cl::desc("[target args...]"));
+
+static cl::opt<std::string> MemLimit(
+ "m", cl::desc("Memory limit for child process (default=none)"),
+ cl::value_desc("megs"), cl::init("none"));
+static cl::opt<std::string> Timeout(
+ "t", cl::desc("Run time limit for child process (default=5000)"),
+ cl::value_desc("msec"), cl::init("5000"));
+
+static cl::opt<bool> CrashMode(
+ "C", cl::desc("Keep crashing inputs, reject everything else"));
+static cl::opt<bool> FridaMode(
+ "O", cl::desc("Use binary-only instrumentation (FRIDA mode)"));
+static cl::opt<bool> QemuMode(
+ "Q", cl::desc("Use binary-only instrumentation (QEMU mode)"));
+static cl::opt<bool> UnicornMode(
+ "U", cl::desc("Use unicorn-based instrumentation (unicorn mode)"));
+
+} // anonymous namespace
+
+// -------------------------------------------------------------------------- //
+// Helper functions
+// -------------------------------------------------------------------------- //
+
+static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
+
+ SmallVector<StringRef, 0> Lines;
+ MB.getBuffer().trim().split(Lines, '\n');
+
+ unsigned FailureCount = 0;
+ unsigned Weight = 0;
+
+ for (const auto &Line : Lines) {
+
+ const auto &[Seed, WeightStr] = Line.split(',');
+
+ if (to_integer(WeightStr, Weight, 10)) {
+
+ Weights.try_emplace(Seed, Weight);
+
+ } else {
+
+ if (FailureCount >= MAX_WEIGHT_FAILURES) {
+ ErrMsg() << "Too many failures. Aborting\n";
+ std::exit(1);
+ }
+
+ WarnMsg() << "Failed to read weight for '" << Seed << "'. Skipping...\n";
+ FailureCount++;
+
+ }
+
+ }
+
+}
+
+static std::error_code readCov(const StringRef Trace, AFLCoverageVector &Cov) {
+
+ const auto CovOrErr = MemoryBuffer::getFile(Trace);
+ if (const auto EC = CovOrErr.getError()) return EC;
+
+ SmallVector<StringRef, 0> Lines;
+ CovOrErr.get()->getBuffer().trim().split(Lines, '\n');
+
+ AFLTupleID Edge = 0;
+ unsigned Freq = 0;
+
+ for (const auto &Line : Lines) {
+
+ const auto &[EdgeStr, FreqStr] = Line.split(':');
+
+ to_integer(EdgeStr, Edge, 10);
+ to_integer(FreqStr, Freq, 10);
+ Cov.push_back({Edge, Freq});
+
+ }
+
+ return std::error_code();
+
+}
+
+static Error runShowmap(AFLCoverageMap &CovMap, const StringRef Input,
+ bool BinCheck = false) {
+
+ const bool InputIsFile = !sys::fs::is_directory(Input);
+ Optional<StringRef> Redirects[] = {None, None, None};
+
+ SmallString<32> TraceDir{OutputDir};
+ sys::path::append(TraceDir, ".traces");
+
+ SmallString<32> Output{TraceDir};
+ SmallString<32> StdinFile{TraceDir};
+
+ // ------------------------------------------------------------------------ //
+ // Prepare afl-showmap arguments
+ //
+ // If the given input is a file, then feed this directly into stdin.
+ // Otherwise, if it is a directory, specify this on the afl-showmap command
+ // line.
+ // ------------------------------------------------------------------------ //
+
+ SmallVector<StringRef, 12> ShowmapArgs{ShowmapPath, "-q",
+ "-m", MemLimit,
+ "-t", Timeout};
+
+ if (InputIsFile) {
+
+ StdinFile = Input;
+ sys::path::append(Output,
+ BinCheck ? ".run_test" : sys::path::filename(Input));
+
+ } else {
+
+ sys::path::append(StdinFile, ".cur_input");
+ ShowmapArgs.append({"-i", Input});
+
+ }
+
+
+ if (TargetArgsHasAtAt) {
+
+ ShowmapArgs.append({"-A", StdinFile});
+ Redirects[/* stdin */ 0] = "/dev/null";
+
+ } else if (InputIsFile) {
+
+ Redirects[/* stdin */ 0] = Input;
+
+ }
+
+ if (FridaMode) ShowmapArgs.push_back("-O");
+ if (QemuMode) ShowmapArgs.push_back("-Q");
+ if (UnicornMode) ShowmapArgs.push_back("-U");
+
+ ShowmapArgs.append({"-o", Output, "--", TargetProg});
+ ShowmapArgs.append(TargetArgs.begin(), TargetArgs.end());
+
+ // ------------------------------------------------------------------------ //
+ // Run afl-showmap
+ // ------------------------------------------------------------------------ //
+
+ const int RC = sys::ExecuteAndWait(ShowmapPath, ShowmapArgs,
+ /*env=*/None, Redirects);
+ if (RC && !CrashMode) {
+
+ ErrMsg() << "Exit code " << RC << " != 0 received from afl-showmap\n";
+ return createStringError(inconvertibleErrorCode(), "afl-showmap failed");
+
+ }
+
+ // ------------------------------------------------------------------------ //
+ // Parse afl-showmap output
+ // ------------------------------------------------------------------------ //
+
+ AFLCoverageVector Cov;
+ std::error_code EC;
+ sys::fs::file_status Status;
+
+ if (InputIsFile) {
+
+ // Read a single output coverage file
+ if ((EC = readCov(Output, Cov))) {
+
+ sys::fs::remove(Output);
+ return errorCodeToError(EC);
+
+ }
+
+ CovMap.try_emplace(sys::path::filename(Input), Cov);
+ if (!KeepTraces) sys::fs::remove(Output);
+
+ } else {
+
+ // Read a directory of output coverage files
+ for (sys::fs::recursive_directory_iterator Dir(TraceDir, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+
+ if (EC) return errorCodeToError(EC);
+
+ const auto &Path = Dir->path();
+ if ((EC = sys::fs::status(Path, Status))) return errorCodeToError(EC);
+
+ switch (Status.type()) {
+
+ case sys::fs::file_type::regular_file:
+ case sys::fs::file_type::symlink_file:
+ case sys::fs::file_type::type_unknown:
+ Cov.clear();
+ if ((EC = readCov(Path, Cov))) {
+
+ sys::fs::remove(Path);
+ return errorCodeToError(EC);
+
+ }
+
+ CovMap.try_emplace(sys::path::filename(Path), Cov);
+ default:
+ // Ignore
+ break;
+
+ }
+
+ }
+
+ if (!KeepTraces) sys::fs::remove_directories(TraceDir);
+
+ }
+
+ return Error::success();
+
+}
+
+static inline void StartTimer() {
+
+ StartTime = std::chrono::system_clock::now();
+
+}
+
+static inline void EndTimer() {
+
+ EndTime = std::chrono::system_clock::now();
+ Duration =
+ std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
+
+ SuccMsg() << " Completed in " << Duration.count() << " s\n";
+
+}
+
+// -------------------------------------------------------------------------- //
+// Main function
+// -------------------------------------------------------------------------- //
+
+int main(int argc, char *argv[]) {
+
+ WeightsMap Weights;
+ std::error_code EC;
+
+ // ------------------------------------------------------------------------ //
+ // Parse command-line options and environment variables
+ //
+ // Also check the target arguments, as this determines how we run afl-showmap.
+ // ------------------------------------------------------------------------ //
+
+ cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer");
+
+ KeepTraces = !!std::getenv("AFL_KEEP_TRACES");
+ SkipBinCheck = !!std::getenv("AFL_SKIP_BIN_CHECK");
+ const auto AFLPath = std::getenv("AFL_PATH");
+
+ if (CrashMode) ::setenv("AFL_CMIN_CRASHES_ONLY", "1", /*overwrite=*/true);
+
+ for (const auto &Arg : TargetArgs)
+ if (Arg == "@@") TargetArgsHasAtAt = true;
+
+ // ------------------------------------------------------------------------ //
+ // Find afl-showmap
+ // ------------------------------------------------------------------------ //
+
+ SmallVector<StringRef, 16> EnvPaths;
+
+ if (const char *PathEnv = std::getenv("PATH"))
+ SplitString(PathEnv, EnvPaths, ":");
+ if (AFLPath) EnvPaths.push_back(AFLPath);
+
+ const auto ShowmapOrErr = sys::findProgramByName("afl-showmap", EnvPaths);
+ if (ShowmapOrErr.getError()) {
+
+ ErrMsg() << "Failed to find afl-showmap. Check your PATH\n";
+ return 1;
+
+ }
+
+ ShowmapPath = *ShowmapOrErr;
+
+ // ------------------------------------------------------------------------ //
+ // Parse weights
+ //
+ // Weights are stored in CSV file mapping a seed file name to an integer
+ // greater than zero.
+ // ------------------------------------------------------------------------ //
+
+ if (WeightsFile != "") {
+
+ StatMsg() << "Reading weights from '" << WeightsFile << "'...\n";
+ StartTimer();
+
+ const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile);
+ if ((EC = WeightsOrErr.getError())) {
+
+ ErrMsg() << "Failed to read weights from '" << WeightsFile
+ << "': " << EC.message() << '\n';
+ return 1;
+
+ }
+
+ GetWeights(*WeightsOrErr.get(), Weights);
+
+ EndTimer();
+
+ }
+
+ // ------------------------------------------------------------------------ //
+ // Traverse input directory
+ //
+ // Find the seed files inside this directory (and subdirectories).
+ // ------------------------------------------------------------------------ //
+
+ StatMsg() << "Locating seeds in '" << InputDir << "'...\n";
+ StartTimer();
+
+ bool IsDirResult;
+ if ((EC = sys::fs::is_directory(InputDir, IsDirResult))) {
+
+ ErrMsg() << "Invalid input directory '" << InputDir << "': " << EC.message()
+ << '\n';
+ return 1;
+
+ }
+
+ sys::fs::file_status Status;
+ StringMap<std::string> SeedFiles;
+
+ for (sys::fs::recursive_directory_iterator Dir(InputDir, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+
+ if (EC) {
+
+ ErrMsg() << "Failed to traverse input directory '" << InputDir
+ << "': " << EC.message() << '\n';
+ return 1;
+
+ }
+
+ const auto &Path = Dir->path();
+ if ((EC = sys::fs::status(Path, Status))) {
+
+ ErrMsg() << "Failed to access '" << Path << "': " << EC.message() << '\n';
+ return 1;
+
+ }
+
+ switch (Status.type()) {
+
+ case sys::fs::file_type::regular_file:
+ case sys::fs::file_type::symlink_file:
+ case sys::fs::file_type::type_unknown:
+ SeedFiles.try_emplace(sys::path::filename(Path),
+ sys::path::parent_path(Path));
+ default:
+ /* Ignore */
+ break;
+
+ }
+
+ }
+
+ EndTimer();
+
+ if (SeedFiles.empty()) {
+
+ ErrMsg() << "Failed to find any seed files in '" << InputDir << "'\n";
+ return 1;
+
+ }
+
+ // ------------------------------------------------------------------------ //
+ // Setup output directory
+ // ------------------------------------------------------------------------ //
+
+ SmallString<32> TraceDir{OutputDir};
+ sys::path::append(TraceDir, ".traces");
+
+ if ((EC = sys::fs::remove_directories(TraceDir))) {
+
+ ErrMsg() << "Failed to remove existing trace directory in '" << OutputDir
+ << "': " << EC.message() << '\n';
+ return 1;
+
+ }
+
+ if ((EC = sys::fs::create_directories(TraceDir))) {
+
+ ErrMsg() << "Failed to create output directory '" << OutputDir
+ << "': " << EC.message() << '\n';
+ return 1;
+
+ }
+
+ // ------------------------------------------------------------------------ //
+ // Test the target binary
+ // ------------------------------------------------------------------------ //
+
+ AFLCoverageMap CovMap;
+
+ if (!SkipBinCheck) {
+
+ const auto It = SeedFiles.begin();
+ SmallString<32> TestSeed{It->second};
+ sys::path::append(TestSeed, It->first());
+
+ StatMsg() << "Testing the target binary with '" << TestSeed << "`...\n";
+ StartTimer();
+
+ if (auto Err = runShowmap(CovMap, TestSeed, /*BinCheck=*/true)) {
+
+ ErrMsg() << "No instrumentation output detected \n";
+ return 1;
+
+ }
+
+ EndTimer();
+ SuccMsg() << "OK, " << CovMap.begin()->second.size()
+ << " tuples recorded\n";
+
+ }
+
+ // ------------------------------------------------------------------------ //
+ // Generate seed coverage
+ //
+ // Iterate over the corpus directory, which should contain seed files. Execute
+ // these seeds in the target program to generate coverage information, and
+ // then store this coverage information in the appropriate data structures.
+ // ------------------------------------------------------------------------ //
+
+ StatMsg() << "Running afl-showmap on " << SeedFiles.size() << " seeds...\n";
+ StartTimer();
+
+ MaxSATSeeds SeedVars;
+ MaxSATCoverageMap SeedCoverage;
+ EvalMaxSAT Solver(/*nbMinimizeThread=*/0);
+
+ CovMap.clear();
+ if (auto Err = runShowmap(CovMap, InputDir)) {
+
+ ErrMsg() << "Failed to generate coverage: " << Err << '\n';
+ return 1;
+
+ }
+
+ for (const auto &SeedCov : CovMap) {
+
+ // Create a variable to represent the seed
+ const SeedID Var = Solver.newVar();
+ SeedVars.emplace_back(Var, SeedCov.first());
+
+ // Record the set of seeds that cover a particular edge
+ for (auto &[Edge, Freq] : SeedCov.second) {
+
+ if (EdgesOnly) {
+
+ // Ignore edge frequency
+ SeedCoverage[Edge].insert(Var);
+
+ } else {
+
+ // Executing edge `E` `N` times means that it was executed `N - 1` times
+ for (unsigned I = 0; I < Freq; ++I)
+ SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var);
+
+ }
+
+ }
+
+ }
+
+ EndTimer();
+
+ // ------------------------------------------------------------------------ //
+ // Set the hard and soft constraints in the solver
+ // ------------------------------------------------------------------------ //
+
+ StatMsg() << "Generating constraints...\n";
+ StartTimer();
+
+ size_t SeedCount = 0;
+
+ // Ensure that at least one seed is selected that covers a particular edge
+ // (hard constraint)
+ std::vector<SeedID> Clauses;
+ for (const auto &[_, Seeds] : SeedCoverage) {
+
+ if (Seeds.empty()) continue;
+
+ Clauses.clear();
+ for (const auto &Seed : Seeds)
+ Clauses.push_back(Seed);
+
+ Solver.addClause(Clauses);
+
+ }
+
+ // Select the minimum number of seeds that cover a particular set of edges
+ // (soft constraint)
+ for (const auto &[Var, Seed] : SeedVars)
+ Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]);
+
+ EndTimer();
+
+ // ------------------------------------------------------------------------ //
+ // Generate a solution
+ // ------------------------------------------------------------------------ //
+
+ StatMsg() << "Solving...\n";
+ StartTimer();
+
+ const bool Solved = Solver.solve();
+
+ EndTimer();
+
+ // ------------------------------------------------------------------------ //
+ // Save the solution
+ //
+ // This will copy the selected seeds to the given output directory.
+ // ------------------------------------------------------------------------ //
+
+ SmallVector<StringRef, 64> Solution;
+ SmallString<32> InputSeed, OutputSeed;
+
+ if (Solved) {
+
+ for (const auto &[Var, Seed] : SeedVars)
+ if (Solver.getValue(Var) > 0) Solution.push_back(Seed);
+
+ } else {
+
+ ErrMsg() << "Failed to find an optimal solution for '" << InputDir << "'\n";
+ return 1;
+
+ }
+
+ StatMsg() << "Copying " << Solution.size() << " seeds to '" << OutputDir
+ << "'...\n";
+ StartTimer();
+
+ SeedCount = 0;
+
+ for (const auto &Seed : Solution) {
+
+ InputSeed = SeedFiles[Seed];
+ sys::path::append(InputSeed, Seed);
+
+ OutputSeed = OutputDir;
+ sys::path::append(OutputSeed, Seed);
+
+ if ((EC = sys::fs::copy_file(InputSeed, OutputSeed))) {
+
+ ErrMsg() << "Failed to copy '" << Seed << "' to '" << OutputDir
+ << "': " << EC.message() << '\n';
+ return 1;
+
+ }
+
+ }
+
+ EndTimer();
+ SuccMsg() << "Done!\n";
+
+ return 0;
+
+}
+