aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2021-07-21 09:48:04 +0200
committerGitHub <noreply@github.com>2021-07-21 09:48:04 +0200
commitfa2b164429f8488bf8a360c20933c7db238ed17c (patch)
tree2d401f65bae30aba3bbff305e80aeea51992f39a /utils
parent3d7a2fc869a03da4c49a0a7e05d97f01a2846337 (diff)
parent62f1bfed99b82bc073c138a00ff9a30bb596d09d (diff)
downloadafl++-fa2b164429f8488bf8a360c20933c7db238ed17c.tar.gz
Merge pull request #1035 from adrianherrera/optimin-util
add optimin corpus minimizer
Diffstat (limited to 'utils')
-rw-r--r--utils/README.md2
-rw-r--r--utils/optimin/.gitignore11
-rw-r--r--utils/optimin/CMakeLists.txt22
-rw-r--r--utils/optimin/EVALMAXSAT_VERSION1
m---------utils/optimin/EvalMaxSAT0
-rw-r--r--utils/optimin/README.md74
-rwxr-xr-xutils/optimin/build_optimin.sh130
-rw-r--r--utils/optimin/src/CMakeLists.txt12
-rw-r--r--utils/optimin/src/OptiMin.cpp455
-rw-r--r--utils/optimin/src/ProgressBar.h58
10 files changed, 765 insertions, 0 deletions
diff --git a/utils/README.md b/utils/README.md
index b157424f..92619fd0 100644
--- a/utils/README.md
+++ b/utils/README.md
@@ -40,6 +40,8 @@ Here's a quick overview of the stuff you can find in this directory:
- libpng_no_checksum - a sample patch for removing CRC checks in libpng.
+ - optimin - An optimal corpus minimizer.
+
- persistent_mode - an example of how to use the LLVM persistent process
mode to speed up certain fuzzing jobs.
diff --git a/utils/optimin/.gitignore b/utils/optimin/.gitignore
new file mode 100644
index 00000000..46f42f8f
--- /dev/null
+++ b/utils/optimin/.gitignore
@@ -0,0 +1,11 @@
+CMakeLists.txt.user
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Testing
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
+_deps
diff --git a/utils/optimin/CMakeLists.txt b/utils/optimin/CMakeLists.txt
new file mode 100644
index 00000000..b45dd004
--- /dev/null
+++ b/utils/optimin/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.10)
+
+project(optimin
+ LANGUAGES CXX
+ DESCRIPTION "MaxSAT-based fuzzing corpus minimizer"
+)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
+
+# Add LLVM
+find_package(LLVM REQUIRED CONFIG)
+message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+
+include_directories(${LLVM_INCLUDE_DIRS})
+add_definitions(${LLVM_DEFINITIONS} -DNDEBUG)
+
+add_subdirectory(EvalMaxSAT)
+add_subdirectory(src)
diff --git a/utils/optimin/EVALMAXSAT_VERSION b/utils/optimin/EVALMAXSAT_VERSION
new file mode 100644
index 00000000..d836ff1c
--- /dev/null
+++ b/utils/optimin/EVALMAXSAT_VERSION
@@ -0,0 +1 @@
+440bf90edf88f6ab940934129e3c5b3b93764295
diff --git a/utils/optimin/EvalMaxSAT b/utils/optimin/EvalMaxSAT
new file mode 160000
+Subproject 440bf90edf88f6ab940934129e3c5b3b9376429
diff --git a/utils/optimin/README.md b/utils/optimin/README.md
new file mode 100644
index 00000000..5001b59d
--- /dev/null
+++ b/utils/optimin/README.md
@@ -0,0 +1,74 @@
+# OptiMin
+
+OptiMin is a corpus minimizer that uses a
+[MaxSAT](https://en.wikipedia.org/wiki/Maximum_satisfiability_problem) solver
+to identify a subset of functionally distinct files that exercise different code
+paths in a target program.
+
+Unlike most corpus minimizers, such as `afl-cmin`, OptiMin does not rely on
+heuristic and/or greedy algorithms to identify these functionally distinct
+files. This means that minimized corpora are generally much smaller than those
+produced by other tools.
+
+## Usage
+
+To build the `optimin` executable (when cloned from github):
+
+```bash
+# Ensure EvalMaxSAT is available
+git submodule init
+git submodule update
+
+mkdir build
+cd build
+
+# You may have to specify -DLLVM_DIR=`llvm-config --cmakedir` if you have a
+# non-standard LLVM install (e.g., install via apt)
+cmake ..
+make -j
+make install
+```
+
+Otherwise, run the `build_optimin.sh` script. Running `optimin` is the same as
+running `afl-cmin`.
+
+### Weighted Minimizations
+
+OptiMin allows for weighted minimizations. For examples, seeds can be weighted
+by file size (or execution time), thus preferencing the selection of smaller (or
+faster) seeds.
+
+To perform a weighted minimization, supply a CSV file with the `-w` option. This
+CSV file is formatted as follows:
+
+```
+SEED_1,WEIGHT_1
+SEED_2,WEIGHT_2
+...
+SEED_N,WEIGHT_N
+```
+
+Where `SEED_N` is the file name (**not** path) of a seed in the input directory,
+and `WEIGHT_N` is an integer weight.
+
+## Further Details and Citation
+
+For more details, please see the paper [Seed Selection for Successful
+Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). If you use OptiMin in
+your research, please cite this paper.
+
+Bibtex:
+
+```bibtex
+@inproceedings{Herrera:2021:FuzzSeedSelection,
+ author = {Adrian Herrera and Hendra Gunadi and Shane Magrath and Michael Norrish and Mathias Payer and Antony L. Hosking},
+ title = {Seed Selection for Successful Fuzzing},
+ booktitle = {30th ACM SIGSOFT International Symposium on Software Testing and Analysis},
+ series = {ISSTA},
+ year = {2021},
+ pages = {230--243},
+ numpages = {14},
+ location = {Virtual, Denmark},
+ publisher = {Association for Computing Machinery},
+}
+```
diff --git a/utils/optimin/build_optimin.sh b/utils/optimin/build_optimin.sh
new file mode 100755
index 00000000..7397aa45
--- /dev/null
+++ b/utils/optimin/build_optimin.sh
@@ -0,0 +1,130 @@
+#!/bin/sh
+#
+# american fuzzy lop++ - optimin build script
+# ------------------------------------------------
+#
+# Originally written by Nathan Voss <njvoss99@gmail.com>
+#
+# Adapted from code by Andrew Griffiths <agriffiths@google.com> and
+# Michal Zalewski
+#
+# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co>
+#
+# Copyright 2017 Battelle Memorial Institute. All rights reserved.
+# Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# This script builds the OptiMin corpus minimizer.
+
+EVALMAXSAT_VERSION="$(cat ./EVALMAXSAT_VERSION)"
+EVALMAXSAT_REPO="https://github.com/FlorentAvellaneda/EvalMaxSAT"
+
+echo "================================================="
+echo "OptiMin build script"
+echo "================================================="
+echo
+
+echo "[*] Performing basic sanity checks..."
+
+PLT=`uname -s`
+
+if [ ! -f "../../config.h" ]; then
+
+ echo "[-] Error: key files not found - wrong working directory?"
+ exit 1
+
+fi
+
+LLVM_CONFIG="${LLVM_CONFIG:-llvm-config}"
+CMAKECMD=cmake
+MAKECMD=make
+TARCMD=tar
+
+if [ "$PLT" = "Darwin" ]; then
+ CORES=`sysctl -n hw.ncpu`
+ TARCMD=tar
+fi
+
+if [ "$PLT" = "FreeBSD" ]; then
+ MAKECMD=gmake
+ CORES=`sysctl -n hw.ncpu`
+ TARCMD=gtar
+fi
+
+if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then
+ MAKECMD=gmake
+ CORES=`sysctl -n hw.ncpu`
+ TARCMD=gtar
+fi
+
+PREREQ_NOTFOUND=
+for i in git $CMAKECMD $MAKECMD $TARCMD; do
+
+ T=`command -v "$i" 2>/dev/null`
+
+ if [ "$T" = "" ]; then
+
+ echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar."
+ PREREQ_NOTFOUND=1
+
+ fi
+
+done
+
+if echo "$CC" | grep -qF /afl-; then
+
+ echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
+ PREREQ_NOTFOUND=1
+
+fi
+
+if [ "$PREREQ_NOTFOUND" = "1" ]; then
+ exit 1
+fi
+
+echo "[+] All checks passed!"
+
+echo "[*] Making sure EvalMaxSAT is checked out"
+
+git status 1>/dev/null 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "[*] initializing EvalMaxSAT submodule"
+ git submodule init || exit 1
+ git submodule update ./EvalMaxSAT 2>/dev/null # ignore errors
+else
+ echo "[*] cloning EvalMaxSAT"
+ test -d EvalMaxSAT || {
+ CNT=1
+ while [ '!' -d EvalMaxSAT -a "$CNT" -lt 4 ]; do
+ echo "Trying to clone EvalMaxSAT (attempt $CNT/3)"
+ git clone "$GRAMMAR_REPO"
+ CNT=`expr "$CNT" + 1`
+ done
+ }
+fi
+
+test -d EvalMaxSAT || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; }
+echo "[+] Got EvalMaxSAT."
+
+cd "EvalMaxSAT" || exit 1
+echo "[*] Checking out $EVALMAXSAT_VERSION"
+sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null
+git checkout "$EVALMAXSAT_VERSION" || exit 1
+cd ..
+
+echo
+echo
+echo "[+] EvalMaxSAT successfully prepared!"
+echo "[+] Building OptiMin now."
+mkdir -p build
+cd build
+cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir`
+make -j$CORES
+cd ..
+echo
+echo "[+] OptiMin successfully built!"
diff --git a/utils/optimin/src/CMakeLists.txt b/utils/optimin/src/CMakeLists.txt
new file mode 100644
index 00000000..f31ceeaf
--- /dev/null
+++ b/utils/optimin/src/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_executable(optimin OptiMin.cpp)
+
+foreach(LIB MaLib EvalMaxSAT glucose)
+ target_include_directories(optimin PRIVATE
+ "${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src")
+ target_link_libraries(optimin ${LIB})
+endforeach(LIB)
+
+llvm_map_components_to_libnames(LLVM_LIBS support)
+target_link_libraries(optimin ${LLVM_LIBS})
+
+install(TARGETS optimin RUNTIME DESTINATION bin)
diff --git a/utils/optimin/src/OptiMin.cpp b/utils/optimin/src/OptiMin.cpp
new file mode 100644
index 00000000..e02fcbe5
--- /dev/null
+++ b/utils/optimin/src/OptiMin.cpp
@@ -0,0 +1,455 @@
+/*
+ * OptiMin, an optimal fuzzing corpus minimizer.
+ *
+ * Author: Adrian Herrera
+ */
+
+#include <cstdint>
+#include <vector>
+
+#include <llvm/ADT/DenseSet.h>
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringExtras.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/Support/Chrono.h>
+#include <llvm/Support/CommandLine.h>
+#include <llvm/Support/FileSystem.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/Path.h>
+#include <llvm/Support/Program.h>
+#include <llvm/Support/WithColor.h>
+
+#include "EvalMaxSAT.h"
+#include "ProgressBar.h"
+
+using namespace llvm;
+
+namespace {
+
+// -------------------------------------------------------------------------- //
+// Classes
+// -------------------------------------------------------------------------- //
+
+/// Ensure seed weights default to 1
+class Weight {
+ public:
+ Weight() : Weight(1){};
+ Weight(uint32_t V) : Value(V){};
+
+ operator unsigned() const {
+ return Value;
+ }
+
+ private:
+ const unsigned Value;
+};
+
+// -------------------------------------------------------------------------- //
+// Typedefs
+// -------------------------------------------------------------------------- //
+
+/// AFL tuple (edge) ID
+using AFLTupleID = uint32_t;
+
+/// Pair of tuple ID and hit count
+using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>;
+
+/// Coverage for a given seed file
+using AFLCoverageVector = std::vector<AFLTuple>;
+
+/// Maps seed file paths to a weight
+using WeightsMap = StringMap<Weight>;
+
+/// A seed identifier in the MaxSAT solver
+using SeedID = int;
+
+/// Associates seed identifiers to seed files
+using MaxSATSeeds =
+ SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>;
+
+/// Set of literal identifiers
+using MaxSATSeedSet = DenseSet<SeedID>;
+
+/// Maps tuple IDs to the literal identifiers that "cover" that tuple
+using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>;
+
+// -------------------------------------------------------------------------- //
+// Global variables
+// -------------------------------------------------------------------------- //
+
+// This is based on the human class count in `count_class_human[256]` in
+// `afl-showmap.c`
+static constexpr uint32_t MAX_EDGE_FREQ = 8;
+
+static sys::TimePoint<> StartTime, EndTime;
+static std::chrono::seconds Duration;
+
+static std::string AFLShowmapPath;
+static bool TargetArgsHasAtAt = false;
+
+static const auto ErrMsg = [] {
+ return WithColor(errs(), HighlightColor::Error) << "[-] ";
+};
+static const auto WarnMsg = [] {
+ return WithColor(errs(), HighlightColor::Warning) << "[-] ";
+};
+static const auto SuccMsg = [] {
+ return WithColor(outs(), HighlightColor::String) << "[+] ";
+};
+static const auto StatMsg = [] {
+ return WithColor(outs(), HighlightColor::Remark) << "[*] ";
+};
+
+static cl::opt<std::string> CorpusDir("i", cl::desc("Input directory"),
+ cl::value_desc("dir"), cl::Required);
+static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"),
+ cl::value_desc("dir"), cl::Required);
+static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"),
+ cl::init(true));
+static cl::opt<bool> ShowProgBar("p", cl::desc("Display progress bar"));
+static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"),
+ cl::value_desc("csv"));
+static cl::opt<std::string> TargetProg(cl::Positional,
+ cl::desc("<target program>"),
+ cl::Required);
+static cl::list<std::string> TargetArgs(cl::ConsumeAfter,
+ cl::desc("[target args...]"));
+static cl::opt<std::string> MemLimit(
+ "m", cl::desc("Memory limit for child process (default=none)"),
+ cl::value_desc("megs"), cl::init("none"));
+static cl::opt<std::string> Timeout(
+ "t", cl::desc("Run time limit for child process (default=none)"),
+ cl::value_desc("msec"), cl::init("none"));
+static cl::opt<bool> CrashMode(
+ "C", cl::desc("Keep crashing inputs, reject everything else"));
+static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation"));
+} // anonymous namespace
+
+// -------------------------------------------------------------------------- //
+// Helper functions
+// -------------------------------------------------------------------------- //
+
+static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) {
+ SmallVector<StringRef, 0> Lines;
+ MB.getBuffer().trim().split(Lines, '\n');
+
+ unsigned Weight = 0;
+
+ for (const auto &Line : Lines) {
+ const auto &[Seed, WeightStr] = Line.split(',');
+
+ if (to_integer(WeightStr, Weight, 10)) {
+ Weights.try_emplace(Seed, Weight);
+ } else {
+ WarnMsg() << "Failed to read weight for `" << Seed << "`. Skipping...\n";
+ }
+ }
+}
+
+[[nodiscard]] static std::error_code getAFLCoverage(const StringRef Seed,
+ AFLCoverageVector &Cov) {
+ Optional<StringRef> Redirects[] = {None, None, None};
+ std::error_code EC;
+
+ // Create temporary output file
+ SmallString<64> OutputPath;
+ if (EC = sys::fs::createTemporaryFile("showmap", "txt", OutputPath))
+ return EC;
+
+ // Prepare afl-showmap arguments
+ SmallVector<StringRef, 12> AFLShowmapArgs{
+ AFLShowmapPath, "-m", MemLimit, "-t", Timeout, "-q", "-o", OutputPath};
+
+ if (TargetArgsHasAtAt)
+ AFLShowmapArgs.append({"-A", Seed});
+ else
+ Redirects[/* stdin */ 0] = Seed;
+
+ if (QemuMode) AFLShowmapArgs.push_back("-Q");
+ if (CrashMode) AFLShowmapArgs.push_back("-C");
+
+ AFLShowmapArgs.append({"--", TargetProg});
+ AFLShowmapArgs.append(TargetArgs.begin(), TargetArgs.end());
+
+ // Run afl-showmap
+ const int RC = sys::ExecuteAndWait(AFLShowmapPath, AFLShowmapArgs,
+ /*env=*/None, Redirects);
+ if (RC) return std::make_error_code(std::errc::executable_format_error);
+
+ // Parse afl-showmap output
+ const auto CovOrErr = MemoryBuffer::getFile(OutputPath);
+ if (EC = CovOrErr.getError()) {
+ sys::fs::remove(OutputPath);
+ return EC;
+ }
+
+ SmallVector<StringRef, 0> Lines;
+ CovOrErr.get()->getBuffer().trim().split(Lines, '\n');
+
+ AFLTupleID Edge = 0;
+ unsigned Freq = 0;
+
+ for (const auto &Line : Lines) {
+ const auto &[EdgeStr, FreqStr] = Line.split(':');
+
+ to_integer(EdgeStr, Edge, 10);
+ to_integer(FreqStr, Freq, 10);
+ Cov.push_back({Edge, Freq});
+ }
+
+ return sys::fs::remove(OutputPath);
+}
+
+static inline void StartTimer(bool ShowProgBar) {
+ StartTime = std::chrono::system_clock::now();
+}
+
+static inline void EndTimer(bool ShowProgBar) {
+ EndTime = std::chrono::system_clock::now();
+ Duration =
+ std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
+
+ if (ShowProgBar)
+ outs() << '\n';
+ else
+ outs() << Duration.count() << "s\n";
+}
+
+// -------------------------------------------------------------------------- //
+// Main function
+// -------------------------------------------------------------------------- //
+
+int main(int argc, char *argv[]) {
+ WeightsMap Weights;
+ ProgressBar ProgBar;
+ std::error_code EC;
+
+ // ------------------------------------------------------------------------ //
+ // Parse command-line options
+ //
+ // Also check the target arguments, as this determines how we run afl-showmap.
+ // ------------------------------------------------------------------------ //
+
+ cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer");
+
+ if (!sys::fs::is_directory(OutputDir)) {
+ ErrMsg() << "Invalid output directory `" << OutputDir << "`\n";
+ return 1;
+ }
+
+ for (const auto &Arg : TargetArgs)
+ if (Arg == "@@") TargetArgsHasAtAt = true;
+
+ // ------------------------------------------------------------------------ //
+ // Find afl-showmap
+ // ------------------------------------------------------------------------ //
+
+ const auto AFLShowmapOrErr = sys::findProgramByName("afl-showmap");
+ if (AFLShowmapOrErr.getError()) {
+ ErrMsg() << "Failed to find afl-showmap. Check your PATH\n";
+ return 1;
+ }
+ AFLShowmapPath = *AFLShowmapOrErr;
+
+ // ------------------------------------------------------------------------ //
+ // Parse weights
+ //
+ // Weights are stored in CSV file mapping a seed file name to an integer
+ // greater than zero.
+ // ------------------------------------------------------------------------ //
+
+ if (WeightsFile != "") {
+ StatMsg() << "Reading weights from `" << WeightsFile << "`... ";
+ StartTimer(/*ShowProgBar=*/false);
+
+ const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile);
+ if (EC = WeightsOrErr.getError()) {
+ ErrMsg() << "Failed to read weights from `" << WeightsFile
+ << "`: " << EC.message() << '\n';
+ return 1;
+ }
+
+ GetWeights(*WeightsOrErr.get(), Weights);
+
+ EndTimer(/*ShowProgBar=*/false);
+ }
+
+ // ------------------------------------------------------------------------ //
+ // Traverse corpus directory
+ //
+ // Find the seed files inside this directory.
+ // ------------------------------------------------------------------------ //
+
+ StatMsg() << "Locating seeds in `" << CorpusDir << "`... ";
+ StartTimer(/*ShowProgBar=*/false);
+
+ std::vector<std::string> SeedFiles;
+ sys::fs::file_status Status;
+
+ for (sys::fs::directory_iterator Dir(CorpusDir, EC), DirEnd;
+ Dir != DirEnd && !EC; Dir.increment(EC)) {
+ if (EC) {
+ ErrMsg() << "Failed to traverse corpus directory `" << CorpusDir
+ << "`: " << EC.message() << '\n';
+ return 1;
+ }
+
+ const auto &Path = Dir->path();
+ if (EC = sys::fs::status(Path, Status)) {
+ WarnMsg() << "Failed to access seed file `" << Path
+ << "`: " << EC.message() << ". Skipping...\n";
+ continue;
+ }
+
+ switch (Status.type()) {
+ case sys::fs::file_type::regular_file:
+ case sys::fs::file_type::symlink_file:
+ case sys::fs::file_type::type_unknown:
+ SeedFiles.push_back(Path);
+ default:
+ /* Ignore */
+ break;
+ }
+ }
+
+ EndTimer(/*ShowProgBar=*/false);
+
+ // ------------------------------------------------------------------------ //
+ // Generate seed coverage
+ //
+ // Iterate over the corpus directory, which should contain seed files. Execute
+ // these seeds in the target program to generate coverage information, and
+ // then store this coverage information in the appropriate data structures.
+ // ------------------------------------------------------------------------ //
+
+ size_t SeedCount = 0;
+ const size_t NumSeeds = SeedFiles.size();
+
+ if (!ShowProgBar)
+ StatMsg() << "Generating coverage for " << NumSeeds << " seeds... ";
+ StartTimer(ShowProgBar);
+
+ EvalMaxSAT Solver(/*nbMinimizeThread=*/0);
+ MaxSATSeeds SeedVars;
+ MaxSATCoverageMap SeedCoverage;
+ AFLCoverageVector Cov;
+
+ for (const auto &SeedFile : SeedFiles) {
+ // Execute seed
+ Cov.clear();
+ if (EC = getAFLCoverage(SeedFile, Cov)) {
+ ErrMsg() << "Failed to get coverage for seed " << SeedFile << ": "
+ << EC.message() << '\n';
+ return 1;
+ }
+
+ // Create a variable to represent the seed
+ const SeedID Var = Solver.newVar();
+ SeedVars.push_back({Var, SeedFile});
+
+ // Record the set of seeds that cover a particular edge
+ for (const auto &[Edge, Freq] : Cov) {
+ if (EdgesOnly) {
+ // Ignore edge frequency
+ SeedCoverage[Edge].insert(Var);
+ } else {
+ // Executing edge `E` `N` times means that it was executed `N - 1` times
+ for (unsigned I = 0; I < Freq; ++I)
+ SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var);
+ }
+ }
+
+ if ((++SeedCount % 10 == 0) && ShowProgBar)
+ ProgBar.update(SeedCount * 100 / NumSeeds, "Generating seed coverage");
+ }
+
+ EndTimer(ShowProgBar);
+
+ // ------------------------------------------------------------------------ //
+ // Set the hard and soft constraints in the solver
+ // ------------------------------------------------------------------------ //
+
+ if (!ShowProgBar) StatMsg() << "Generating constraints... ";
+ StartTimer(ShowProgBar);
+
+ SeedCount = 0;
+
+ // Ensure that at least one seed is selected that covers a particular edge
+ // (hard constraint)
+ std::vector<SeedID> Clauses;
+ for (const auto &[_, Seeds] : SeedCoverage) {
+ if (Seeds.empty()) continue;
+
+ Clauses.clear();
+ for (const auto &Seed : Seeds)
+ Clauses.push_back(Seed);
+
+ Solver.addClause(Clauses);
+
+ if ((++SeedCount % 10 == 0) && ShowProgBar)
+ ProgBar.update(SeedCount * 100 / SeedCoverage.size(),
+ "Generating clauses");
+ }
+
+ // Select the minimum number of seeds that cover a particular set of edges
+ // (soft constraint)
+ for (const auto &[Var, Seed] : SeedVars)
+ Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]);
+
+ EndTimer(ShowProgBar);
+
+ // ------------------------------------------------------------------------ //
+ // Generate a solution
+ // ------------------------------------------------------------------------ //
+
+ StatMsg() << "Solving... ";
+ StartTimer(/*ShowProgBar=*/false);
+
+ const bool Solved = Solver.solve();
+
+ EndTimer(/*ShowProgBar=*/false);
+
+ // ------------------------------------------------------------------------ //
+ // Save the solution
+ //
+ // This will copy the selected seeds to the given output directory.
+ // ------------------------------------------------------------------------ //
+
+ SmallVector<StringRef, 64> Solution;
+ SmallString<32> OutputSeed;
+
+ if (Solved) {
+ for (const auto &[Var, Seed] : SeedVars)
+ if (Solver.getValue(Var) > 0) Solution.push_back(Seed);
+ } else {
+ ErrMsg() << "Failed to find an optimal solution for `" << CorpusDir
+ << "`\n";
+ return 1;
+ }
+
+ SuccMsg() << "Minimized corpus size: " << Solution.size() << " seeds\n";
+
+ if (!ShowProgBar) StatMsg() << "Copying to `" << OutputDir << "`... ";
+ StartTimer(ShowProgBar);
+
+ SeedCount = 0;
+
+ for (const auto &Seed : Solution) {
+ OutputSeed = OutputDir;
+ sys::path::append(OutputSeed, sys::path::filename(Seed));
+
+ if (EC = sys::fs::copy_file(Seed, OutputSeed)) {
+ WarnMsg() << "Failed to copy `" << Seed << "` to `" << OutputDir
+ << "`: " << EC.message() << '\n';
+ }
+
+ if ((++SeedCount % 10 == 0) && ShowProgBar)
+ ProgBar.update(SeedCount * 100 / Solution.size(), "Copying seeds");
+ }
+
+ EndTimer(ShowProgBar);
+ SuccMsg() << "Done!\n";
+
+ return 0;
+}
diff --git a/utils/optimin/src/ProgressBar.h b/utils/optimin/src/ProgressBar.h
new file mode 100644
index 00000000..2f8d7403
--- /dev/null
+++ b/utils/optimin/src/ProgressBar.h
@@ -0,0 +1,58 @@
+/**
+ * Progress bar.
+ *
+ * Adapted from https://www.bfilipek.com/2020/02/inidicators.html
+ */
+
+#pragma once
+
+#include <llvm/ADT/StringRef.h>
+#include <llvm/Support/raw_ostream.h>
+
+/// Display a progress bar in the terminal
+class ProgressBar {
+ private:
+ const size_t BarWidth;
+ const std::string Fill;
+ const std::string Remainder;
+
+ public:
+ ProgressBar() : ProgressBar(60, "#", " ") {
+ }
+
+ ProgressBar(size_t Width, const llvm::StringRef F, const llvm::StringRef R)
+ : BarWidth(Width), Fill(F), Remainder(R) {
+ }
+
+ void update(float Progress, const llvm::StringRef Status = "",
+ llvm::raw_ostream &OS = llvm::outs()) {
+ // No need to write once progress is 100%
+ if (Progress > 100.0f) return;
+
+ // Move cursor to the first position on the same line and flush
+ OS << '\r';
+ OS.flush();
+
+ // Start bar
+ OS << '[';
+
+ const auto Completed =
+ static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0);
+ for (size_t I = 0; I < BarWidth; ++I) {
+ if (I <= Completed) {
+ OS << Fill;
+ } else {
+ OS << Remainder;
+ }
+ }
+
+ // End bar
+ OS << ']';
+
+ // Write progress percentage
+ OS << ' ' << std::min(static_cast<size_t>(Progress), size_t(100)) << '%';
+
+ // Write status text
+ OS << " " << Status;
+ }
+};