diff options
Diffstat (limited to 'utils/optimin')
-rw-r--r-- | utils/optimin/.gitignore | 11 | ||||
-rw-r--r-- | utils/optimin/CMakeLists.txt | 22 | ||||
-rw-r--r-- | utils/optimin/EVALMAXSAT_VERSION | 1 | ||||
m--------- | utils/optimin/EvalMaxSAT | 0 | ||||
-rw-r--r-- | utils/optimin/README.md | 94 | ||||
-rwxr-xr-x | utils/optimin/build_optimin.sh | 131 | ||||
-rw-r--r-- | utils/optimin/src/CMakeLists.txt | 12 | ||||
-rw-r--r-- | utils/optimin/src/OptiMin.cpp | 702 |
8 files changed, 973 insertions, 0 deletions
diff --git a/utils/optimin/.gitignore b/utils/optimin/.gitignore new file mode 100644 index 00000000..46f42f8f --- /dev/null +++ b/utils/optimin/.gitignore @@ -0,0 +1,11 @@ +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps diff --git a/utils/optimin/CMakeLists.txt b/utils/optimin/CMakeLists.txt new file mode 100644 index 00000000..b45dd004 --- /dev/null +++ b/utils/optimin/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.10) + +project(optimin + LANGUAGES CXX + DESCRIPTION "MaxSAT-based fuzzing corpus minimizer" +) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") + +# Add LLVM +find_package(LLVM REQUIRED CONFIG) +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + +include_directories(${LLVM_INCLUDE_DIRS}) +add_definitions(${LLVM_DEFINITIONS} -DNDEBUG) + +add_subdirectory(EvalMaxSAT) +add_subdirectory(src) diff --git a/utils/optimin/EVALMAXSAT_VERSION b/utils/optimin/EVALMAXSAT_VERSION new file mode 100644 index 00000000..d836ff1c --- /dev/null +++ b/utils/optimin/EVALMAXSAT_VERSION @@ -0,0 +1 @@ +440bf90edf88f6ab940934129e3c5b3b93764295 diff --git a/utils/optimin/EvalMaxSAT b/utils/optimin/EvalMaxSAT new file mode 160000 +Subproject 440bf90edf88f6ab940934129e3c5b3b9376429 diff --git a/utils/optimin/README.md b/utils/optimin/README.md new file mode 100644 index 00000000..c6f2af06 --- /dev/null +++ b/utils/optimin/README.md @@ -0,0 +1,94 @@ +# OptiMin + +OptiMin is a corpus minimizer that uses a +[MaxSAT](https://en.wikipedia.org/wiki/Maximum_satisfiability_problem) solver +to identify a subset of functionally distinct files that exercise different code +paths in a target program. + +Unlike most corpus minimizers, such as `afl-cmin`, OptiMin does not rely on +heuristic and/or greedy algorithms to identify these functionally distinct +files. This means that minimized corpora are generally much smaller than those +produced by other tools. + +## Building + +To build the `optimin` just execute the `build_optimin.sh` script. + +## Running + +Running `optimin` is the same as running `afl-cmin`: + +``` +./optimin -h +OVERVIEW: Optimal corpus minimizer +USAGE: optimin [options] <target program> [target args...] + +OPTIONS: + +Color Options: + + --color - Use colors in output (default=autodetect) + +General options: + + -C - Keep crashing inputs, reject everything else + -O - Use binary-only instrumentation (FRIDA mode) + -Q - Use binary-only instrumentation (QEMU mode) + -U - Use unicorn-based instrumentation (unicorn mode) + -f - Include edge hit counts + -i dir - Input directory + -m megs - Memory limit for child process (default=none) + -o dir - Output directory + -p - Display progress bar + -t msec - Run time limit for child process (default=5000) + -w csv - Weights file + +Generic Options: + + --help - Display available options (--help-hidden for more) + --help-list - Display list of available options (--help-list-hidden for more) + --version - Display the version of this program +``` + +Example: `optimin -i files -o seeds -- ./target @@` + +### Weighted Minimizations + +OptiMin allows for weighted minimizations. For examples, seeds can be weighted +by file size (or execution time), thus preferencing the selection of smaller (or +faster) seeds. + +To perform a weighted minimization, supply a CSV file with the `-w` option. This +CSV file is formatted as follows: + +``` +SEED_1,WEIGHT_1 +SEED_2,WEIGHT_2 +... +SEED_N,WEIGHT_N +``` + +Where `SEED_N` is the file name (**not** path) of a seed in the input directory, +and `WEIGHT_N` is an integer weight. + +## Further Details and Citation + +For more details, please see the paper [Seed Selection for Successful +Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). If you use OptiMin in +your research, please cite this paper. + +Bibtex: + +```bibtex +@inproceedings{Herrera:2021:FuzzSeedSelection, + author = {Adrian Herrera and Hendra Gunadi and Shane Magrath and Michael Norrish and Mathias Payer and Antony L. Hosking}, + title = {Seed Selection for Successful Fuzzing}, + booktitle = {30th ACM SIGSOFT International Symposium on Software Testing and Analysis}, + series = {ISSTA}, + year = {2021}, + pages = {230--243}, + numpages = {14}, + location = {Virtual, Denmark}, + publisher = {Association for Computing Machinery}, +} +``` diff --git a/utils/optimin/build_optimin.sh b/utils/optimin/build_optimin.sh new file mode 100755 index 00000000..9480f966 --- /dev/null +++ b/utils/optimin/build_optimin.sh @@ -0,0 +1,131 @@ +#!/bin/sh +# +# american fuzzy lop++ - optimin build script +# ------------------------------------------------ +# +# Originally written by Nathan Voss <njvoss99@gmail.com> +# +# Adapted from code by Andrew Griffiths <agriffiths@google.com> and +# Michal Zalewski +# +# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co> +# +# Copyright 2017 Battelle Memorial Institute. All rights reserved. +# Copyright 2019-2020 AFLplusplus Project. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# This script builds the OptiMin corpus minimizer. + +EVALMAXSAT_VERSION="$(cat ./EVALMAXSAT_VERSION)" +EVALMAXSAT_REPO="https://github.com/FlorentAvellaneda/EvalMaxSAT" + +echo "=================================================" +echo "OptiMin build script" +echo "=================================================" +echo + +echo "[*] Performing basic sanity checks..." + +PLT=`uname -s` + +if [ ! -f "../../config.h" ]; then + + echo "[-] Error: key files not found - wrong working directory?" + exit 1 + +fi + +LLVM_CONFIG="${LLVM_CONFIG:-llvm-config}" +CMAKECMD=cmake +MAKECMD=make +TARCMD=tar + +if [ "$PLT" = "Darwin" ]; then + CORES=`sysctl -n hw.ncpu` + TARCMD=tar +fi + +if [ "$PLT" = "FreeBSD" ]; then + MAKECMD=gmake + CORES=`sysctl -n hw.ncpu` + TARCMD=gtar +fi + +if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then + MAKECMD=gmake + CORES=`sysctl -n hw.ncpu` + TARCMD=gtar +fi + +PREREQ_NOTFOUND= +for i in git $CMAKECMD $MAKECMD $TARCMD; do + + T=`command -v "$i" 2>/dev/null` + + if [ "$T" = "" ]; then + + echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar." + PREREQ_NOTFOUND=1 + + fi + +done + +if echo "$CC" | grep -qF /afl-; then + + echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool." + PREREQ_NOTFOUND=1 + +fi + +if [ "$PREREQ_NOTFOUND" = "1" ]; then + exit 1 +fi + +echo "[+] All checks passed!" + +echo "[*] Making sure EvalMaxSAT is checked out" + +git status 1>/dev/null 2>/dev/null +if [ $? -eq 0 ]; then + echo "[*] initializing EvalMaxSAT submodule" + git submodule init || exit 1 + git submodule update ./EvalMaxSAT 2>/dev/null # ignore errors +else + echo "[*] cloning EvalMaxSAT" + test -d EvalMaxSAT || { + CNT=1 + while [ '!' -d EvalMaxSAT -a "$CNT" -lt 4 ]; do + echo "Trying to clone EvalMaxSAT (attempt $CNT/3)" + git clone "$GRAMMAR_REPO" + CNT=`expr "$CNT" + 1` + done + } +fi + +test -d EvalMaxSAT || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +echo "[+] Got EvalMaxSAT." + +cd "EvalMaxSAT" || exit 1 +echo "[*] Checking out $EVALMAXSAT_VERSION" +sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null +git checkout "$EVALMAXSAT_VERSION" || exit 1 +cd .. + +echo +echo +echo "[+] EvalMaxSAT successfully prepared!" +echo "[+] Building OptiMin now." +mkdir -p build +cd build || exit 1 +cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` || exit 1 +make -j$CORES || exit 1 +cd .. +echo +cp -fv build/src/optimin . || exit 1 +echo "[+] OptiMin successfully built!" diff --git a/utils/optimin/src/CMakeLists.txt b/utils/optimin/src/CMakeLists.txt new file mode 100644 index 00000000..f31ceeaf --- /dev/null +++ b/utils/optimin/src/CMakeLists.txt @@ -0,0 +1,12 @@ +add_executable(optimin OptiMin.cpp) + +foreach(LIB MaLib EvalMaxSAT glucose) + target_include_directories(optimin PRIVATE + "${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src") + target_link_libraries(optimin ${LIB}) +endforeach(LIB) + +llvm_map_components_to_libnames(LLVM_LIBS support) +target_link_libraries(optimin ${LLVM_LIBS}) + +install(TARGETS optimin RUNTIME DESTINATION bin) diff --git a/utils/optimin/src/OptiMin.cpp b/utils/optimin/src/OptiMin.cpp new file mode 100644 index 00000000..ce1fb850 --- /dev/null +++ b/utils/optimin/src/OptiMin.cpp @@ -0,0 +1,702 @@ +/* + * OptiMin, an optimal fuzzing corpus minimizer. + * + * Author: Adrian Herrera + */ + +#include <cstdint> +#include <cstdlib> +#include <vector> + +#include <llvm/ADT/DenseSet.h> +#include <llvm/ADT/DenseMap.h> +#include <llvm/ADT/SmallVector.h> +#include <llvm/ADT/StringExtras.h> +#include <llvm/ADT/StringMap.h> +#include <llvm/Support/Chrono.h> +#include <llvm/Support/CommandLine.h> +#include <llvm/Support/Error.h> +#include <llvm/Support/FileSystem.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Support/Path.h> +#include <llvm/Support/Program.h> +#include <llvm/Support/WithColor.h> + +#include "EvalMaxSAT.h" + +using namespace llvm; + +namespace { + +// -------------------------------------------------------------------------- // +// Classes +// -------------------------------------------------------------------------- // + +/// Ensure seed weights default to 1 +class Weight { + + public: + Weight() : Weight(1){}; + Weight(uint32_t V) : Value(V){}; + + operator unsigned() const { + + return Value; + + } + + private: + const unsigned Value; + +}; + +// -------------------------------------------------------------------------- // +// Typedefs +// -------------------------------------------------------------------------- // + +/// AFL tuple (edge) ID +using AFLTupleID = uint32_t; + +/// Pair of tuple ID and hit count +using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>; + +/// Coverage for a given seed file +using AFLCoverageVector = std::vector<AFLTuple>; + +/// Map seed file paths to its coverage vector +using AFLCoverageMap = StringMap<AFLCoverageVector>; + +/// Map seed file paths to a weight +using WeightsMap = StringMap<Weight>; + +/// A seed identifier in the MaxSAT solver +using SeedID = int; + +/// Associates seed identifiers to seed files +using MaxSATSeeds = + SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>; + +/// Set of literal identifiers +using MaxSATSeedSet = DenseSet<SeedID>; + +/// Maps tuple IDs to the literal identifiers that "cover" that tuple +using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>; + +// -------------------------------------------------------------------------- // +// Global variables +// -------------------------------------------------------------------------- // + +// This is based on the human class count in `count_class_human[256]` in +// `afl-showmap.c` +static constexpr uint32_t MAX_EDGE_FREQ = 8; + +// The maximum number of failures allowed when parsing a weights file +static constexpr unsigned MAX_WEIGHT_FAILURES = 5; + +static sys::TimePoint<> StartTime, EndTime; +static std::chrono::seconds Duration; + +static std::string ShowmapPath; +static bool TargetArgsHasAtAt = false; +static bool KeepTraces = false; +static bool SkipBinCheck = false; + +static const auto ErrMsg = [] { + + return WithColor(errs(), raw_ostream::RED, /*Bold=*/true) << "[-] "; + +}; + +static const auto WarnMsg = [] { + + return WithColor(errs(), raw_ostream::MAGENTA, /*Bold=*/true) << "[-] "; + +}; + +static const auto SuccMsg = [] { + + return WithColor(outs(), raw_ostream::GREEN, /*Bold=*/true) << "[+] "; + +}; + +static const auto StatMsg = [] { + + return WithColor(outs(), raw_ostream::BLUE, /*Bold=*/true) << "[*] "; + +}; + +static cl::opt<std::string> InputDir("i", cl::desc("Input directory"), + cl::value_desc("dir"), cl::Required); +static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"), + cl::value_desc("dir"), cl::Required); + +static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"), + cl::init(true)); +static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"), + cl::value_desc("csv")); + +static cl::opt<std::string> TargetProg(cl::Positional, + cl::desc("<target program>"), + cl::Required); +static cl::list<std::string> TargetArgs(cl::ConsumeAfter, + cl::desc("[target args...]")); + +static cl::opt<std::string> MemLimit( + "m", cl::desc("Memory limit for child process (default=none)"), + cl::value_desc("megs"), cl::init("none")); +static cl::opt<std::string> Timeout( + "t", cl::desc("Run time limit for child process (default=5000)"), + cl::value_desc("msec"), cl::init("5000")); + +static cl::opt<bool> CrashMode( + "C", cl::desc("Keep crashing inputs, reject everything else")); +static cl::opt<bool> FridaMode( + "O", cl::desc("Use binary-only instrumentation (FRIDA mode)")); +static cl::opt<bool> QemuMode( + "Q", cl::desc("Use binary-only instrumentation (QEMU mode)")); +static cl::opt<bool> UnicornMode( + "U", cl::desc("Use unicorn-based instrumentation (unicorn mode)")); + +} // anonymous namespace + +// -------------------------------------------------------------------------- // +// Helper functions +// -------------------------------------------------------------------------- // + +static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) { + + SmallVector<StringRef, 0> Lines; + MB.getBuffer().trim().split(Lines, '\n'); + + unsigned FailureCount = 0; + unsigned Weight = 0; + + for (const auto &Line : Lines) { + + const auto &[Seed, WeightStr] = Line.split(','); + + if (to_integer(WeightStr, Weight, 10)) { + + Weights.try_emplace(Seed, Weight); + + } else { + + if (FailureCount >= MAX_WEIGHT_FAILURES) { + ErrMsg() << "Too many failures. Aborting\n"; + std::exit(1); + } + + WarnMsg() << "Failed to read weight for '" << Seed << "'. Skipping...\n"; + FailureCount++; + + } + + } + +} + +static std::error_code readCov(const StringRef Trace, AFLCoverageVector &Cov) { + + const auto CovOrErr = MemoryBuffer::getFile(Trace); + if (const auto EC = CovOrErr.getError()) return EC; + + SmallVector<StringRef, 0> Lines; + CovOrErr.get()->getBuffer().trim().split(Lines, '\n'); + + AFLTupleID Edge = 0; + unsigned Freq = 0; + + for (const auto &Line : Lines) { + + const auto &[EdgeStr, FreqStr] = Line.split(':'); + + to_integer(EdgeStr, Edge, 10); + to_integer(FreqStr, Freq, 10); + Cov.push_back({Edge, Freq}); + + } + + return std::error_code(); + +} + +static Error runShowmap(AFLCoverageMap &CovMap, const StringRef Input, + bool BinCheck = false) { + + const bool InputIsFile = !sys::fs::is_directory(Input); + Optional<StringRef> Redirects[] = {None, None, None}; + + SmallString<32> TraceDir{OutputDir}; + sys::path::append(TraceDir, ".traces"); + + SmallString<32> Output{TraceDir}; + SmallString<32> StdinFile{TraceDir}; + + // ------------------------------------------------------------------------ // + // Prepare afl-showmap arguments + // + // If the given input is a file, then feed this directly into stdin. + // Otherwise, if it is a directory, specify this on the afl-showmap command + // line. + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 12> ShowmapArgs{ShowmapPath, "-q", + "-m", MemLimit, + "-t", Timeout}; + + if (InputIsFile) { + + StdinFile = Input; + sys::path::append(Output, + BinCheck ? ".run_test" : sys::path::filename(Input)); + + } else { + + sys::path::append(StdinFile, ".cur_input"); + ShowmapArgs.append({"-i", Input}); + + } + + + if (TargetArgsHasAtAt) { + + ShowmapArgs.append({"-A", StdinFile}); + Redirects[/* stdin */ 0] = "/dev/null"; + + } else if (InputIsFile) { + + Redirects[/* stdin */ 0] = Input; + + } + + if (FridaMode) ShowmapArgs.push_back("-O"); + if (QemuMode) ShowmapArgs.push_back("-Q"); + if (UnicornMode) ShowmapArgs.push_back("-U"); + + ShowmapArgs.append({"-o", Output, "--", TargetProg}); + ShowmapArgs.append(TargetArgs.begin(), TargetArgs.end()); + + // ------------------------------------------------------------------------ // + // Run afl-showmap + // ------------------------------------------------------------------------ // + + const int RC = sys::ExecuteAndWait(ShowmapPath, ShowmapArgs, + /*env=*/None, Redirects); + if (RC && !CrashMode) { + + ErrMsg() << "Exit code " << RC << " != 0 received from afl-showmap\n"; + return createStringError(inconvertibleErrorCode(), "afl-showmap failed"); + + } + + // ------------------------------------------------------------------------ // + // Parse afl-showmap output + // ------------------------------------------------------------------------ // + + AFLCoverageVector Cov; + std::error_code EC; + sys::fs::file_status Status; + + if (InputIsFile) { + + // Read a single output coverage file + if ((EC = readCov(Output, Cov))) { + + sys::fs::remove(Output); + return errorCodeToError(EC); + + } + + CovMap.try_emplace(sys::path::filename(Input), Cov); + if (!KeepTraces) sys::fs::remove(Output); + + } else { + + // Read a directory of output coverage files + for (sys::fs::recursive_directory_iterator Dir(TraceDir, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + + if (EC) return errorCodeToError(EC); + + const auto &Path = Dir->path(); + if ((EC = sys::fs::status(Path, Status))) return errorCodeToError(EC); + + switch (Status.type()) { + + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + Cov.clear(); + if ((EC = readCov(Path, Cov))) { + + sys::fs::remove(Path); + return errorCodeToError(EC); + + } + + CovMap.try_emplace(sys::path::filename(Path), Cov); + default: + // Ignore + break; + + } + + } + + if (!KeepTraces) sys::fs::remove_directories(TraceDir); + + } + + return Error::success(); + +} + +static inline void StartTimer() { + + StartTime = std::chrono::system_clock::now(); + +} + +static inline void EndTimer() { + + EndTime = std::chrono::system_clock::now(); + Duration = + std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime); + + SuccMsg() << " Completed in " << Duration.count() << " s\n"; + +} + +// -------------------------------------------------------------------------- // +// Main function +// -------------------------------------------------------------------------- // + +int main(int argc, char *argv[]) { + + WeightsMap Weights; + std::error_code EC; + + // ------------------------------------------------------------------------ // + // Parse command-line options and environment variables + // + // Also check the target arguments, as this determines how we run afl-showmap. + // ------------------------------------------------------------------------ // + + cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer"); + + KeepTraces = !!std::getenv("AFL_KEEP_TRACES"); + SkipBinCheck = !!std::getenv("AFL_SKIP_BIN_CHECK"); + const auto AFLPath = std::getenv("AFL_PATH"); + + if (CrashMode) ::setenv("AFL_CMIN_CRASHES_ONLY", "1", /*overwrite=*/true); + + for (const auto &Arg : TargetArgs) + if (Arg == "@@") TargetArgsHasAtAt = true; + + // ------------------------------------------------------------------------ // + // Find afl-showmap + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 16> EnvPaths; + + if (const char *PathEnv = std::getenv("PATH")) + SplitString(PathEnv, EnvPaths, ":"); + if (AFLPath) EnvPaths.push_back(AFLPath); + + const auto ShowmapOrErr = sys::findProgramByName("afl-showmap", EnvPaths); + if (ShowmapOrErr.getError()) { + + ErrMsg() << "Failed to find afl-showmap. Check your PATH\n"; + return 1; + + } + + ShowmapPath = *ShowmapOrErr; + + // ------------------------------------------------------------------------ // + // Parse weights + // + // Weights are stored in CSV file mapping a seed file name to an integer + // greater than zero. + // ------------------------------------------------------------------------ // + + if (WeightsFile != "") { + + StatMsg() << "Reading weights from '" << WeightsFile << "'...\n"; + StartTimer(); + + const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile); + if ((EC = WeightsOrErr.getError())) { + + ErrMsg() << "Failed to read weights from '" << WeightsFile + << "': " << EC.message() << '\n'; + return 1; + + } + + GetWeights(*WeightsOrErr.get(), Weights); + + EndTimer(); + + } + + // ------------------------------------------------------------------------ // + // Traverse input directory + // + // Find the seed files inside this directory (and subdirectories). + // ------------------------------------------------------------------------ // + + StatMsg() << "Locating seeds in '" << InputDir << "'...\n"; + StartTimer(); + + bool IsDirResult; + if ((EC = sys::fs::is_directory(InputDir, IsDirResult))) { + + ErrMsg() << "Invalid input directory '" << InputDir << "': " << EC.message() + << '\n'; + return 1; + + } + + sys::fs::file_status Status; + StringMap<std::string> SeedFiles; + + for (sys::fs::recursive_directory_iterator Dir(InputDir, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + + if (EC) { + + ErrMsg() << "Failed to traverse input directory '" << InputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + const auto &Path = Dir->path(); + if ((EC = sys::fs::status(Path, Status))) { + + ErrMsg() << "Failed to access '" << Path << "': " << EC.message() << '\n'; + return 1; + + } + + switch (Status.type()) { + + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + SeedFiles.try_emplace(sys::path::filename(Path), + sys::path::parent_path(Path)); + default: + /* Ignore */ + break; + + } + + } + + EndTimer(); + + if (SeedFiles.empty()) { + + ErrMsg() << "Failed to find any seed files in '" << InputDir << "'\n"; + return 1; + + } + + // ------------------------------------------------------------------------ // + // Setup output directory + // ------------------------------------------------------------------------ // + + SmallString<32> TraceDir{OutputDir}; + sys::path::append(TraceDir, ".traces"); + + if ((EC = sys::fs::remove_directories(TraceDir))) { + + ErrMsg() << "Failed to remove existing trace directory in '" << OutputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + if ((EC = sys::fs::create_directories(TraceDir))) { + + ErrMsg() << "Failed to create output directory '" << OutputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + // ------------------------------------------------------------------------ // + // Test the target binary + // ------------------------------------------------------------------------ // + + AFLCoverageMap CovMap; + + if (!SkipBinCheck) { + + const auto It = SeedFiles.begin(); + SmallString<32> TestSeed{It->second}; + sys::path::append(TestSeed, It->first()); + + StatMsg() << "Testing the target binary with '" << TestSeed << "`...\n"; + StartTimer(); + + if (auto Err = runShowmap(CovMap, TestSeed, /*BinCheck=*/true)) { + + ErrMsg() << "No instrumentation output detected \n"; + return 1; + + } + + EndTimer(); + SuccMsg() << "OK, " << CovMap.begin()->second.size() + << " tuples recorded\n"; + + } + + // ------------------------------------------------------------------------ // + // Generate seed coverage + // + // Iterate over the corpus directory, which should contain seed files. Execute + // these seeds in the target program to generate coverage information, and + // then store this coverage information in the appropriate data structures. + // ------------------------------------------------------------------------ // + + StatMsg() << "Running afl-showmap on " << SeedFiles.size() << " seeds...\n"; + StartTimer(); + + MaxSATSeeds SeedVars; + MaxSATCoverageMap SeedCoverage; + EvalMaxSAT Solver(/*nbMinimizeThread=*/0); + + CovMap.clear(); + if (auto Err = runShowmap(CovMap, InputDir)) { + + ErrMsg() << "Failed to generate coverage: " << Err << '\n'; + return 1; + + } + + for (const auto &SeedCov : CovMap) { + + // Create a variable to represent the seed + const SeedID Var = Solver.newVar(); + SeedVars.emplace_back(Var, SeedCov.first()); + + // Record the set of seeds that cover a particular edge + for (auto &[Edge, Freq] : SeedCov.second) { + + if (EdgesOnly) { + + // Ignore edge frequency + SeedCoverage[Edge].insert(Var); + + } else { + + // Executing edge `E` `N` times means that it was executed `N - 1` times + for (unsigned I = 0; I < Freq; ++I) + SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var); + + } + + } + + } + + EndTimer(); + + // ------------------------------------------------------------------------ // + // Set the hard and soft constraints in the solver + // ------------------------------------------------------------------------ // + + StatMsg() << "Generating constraints...\n"; + StartTimer(); + + size_t SeedCount = 0; + + // Ensure that at least one seed is selected that covers a particular edge + // (hard constraint) + std::vector<SeedID> Clauses; + for (const auto &[_, Seeds] : SeedCoverage) { + + if (Seeds.empty()) continue; + + Clauses.clear(); + for (const auto &Seed : Seeds) + Clauses.push_back(Seed); + + Solver.addClause(Clauses); + + } + + // Select the minimum number of seeds that cover a particular set of edges + // (soft constraint) + for (const auto &[Var, Seed] : SeedVars) + Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]); + + EndTimer(); + + // ------------------------------------------------------------------------ // + // Generate a solution + // ------------------------------------------------------------------------ // + + StatMsg() << "Solving...\n"; + StartTimer(); + + const bool Solved = Solver.solve(); + + EndTimer(); + + // ------------------------------------------------------------------------ // + // Save the solution + // + // This will copy the selected seeds to the given output directory. + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 64> Solution; + SmallString<32> InputSeed, OutputSeed; + + if (Solved) { + + for (const auto &[Var, Seed] : SeedVars) + if (Solver.getValue(Var) > 0) Solution.push_back(Seed); + + } else { + + ErrMsg() << "Failed to find an optimal solution for '" << InputDir << "'\n"; + return 1; + + } + + StatMsg() << "Copying " << Solution.size() << " seeds to '" << OutputDir + << "'...\n"; + StartTimer(); + + SeedCount = 0; + + for (const auto &Seed : Solution) { + + InputSeed = SeedFiles[Seed]; + sys::path::append(InputSeed, Seed); + + OutputSeed = OutputDir; + sys::path::append(OutputSeed, Seed); + + if ((EC = sys::fs::copy_file(InputSeed, OutputSeed))) { + + ErrMsg() << "Failed to copy '" << Seed << "' to '" << OutputDir + << "': " << EC.message() << '\n'; + return 1; + + } + + } + + EndTimer(); + SuccMsg() << "Done!\n"; + + return 0; + +} + |