diff options
author | Adrian Herrera <adrian.herrera02@gmail.com> | 2021-07-20 04:23:26 +0000 |
---|---|---|
committer | Adrian Herrera <adrian.herrera02@gmail.com> | 2021-07-21 04:02:52 +0000 |
commit | 62f1bfed99b82bc073c138a00ff9a30bb596d09d (patch) | |
tree | 2d401f65bae30aba3bbff305e80aeea51992f39a /utils/optimin | |
parent | 3d7a2fc869a03da4c49a0a7e05d97f01a2846337 (diff) | |
download | afl++-62f1bfed99b82bc073c138a00ff9a30bb596d09d.tar.gz |
utils: added optimin corpus minimizer
Diffstat (limited to 'utils/optimin')
-rw-r--r-- | utils/optimin/.gitignore | 11 | ||||
-rw-r--r-- | utils/optimin/CMakeLists.txt | 22 | ||||
-rw-r--r-- | utils/optimin/EVALMAXSAT_VERSION | 1 | ||||
m--------- | utils/optimin/EvalMaxSAT | 0 | ||||
-rw-r--r-- | utils/optimin/README.md | 74 | ||||
-rwxr-xr-x | utils/optimin/build_optimin.sh | 130 | ||||
-rw-r--r-- | utils/optimin/src/CMakeLists.txt | 12 | ||||
-rw-r--r-- | utils/optimin/src/OptiMin.cpp | 455 | ||||
-rw-r--r-- | utils/optimin/src/ProgressBar.h | 58 |
9 files changed, 763 insertions, 0 deletions
diff --git a/utils/optimin/.gitignore b/utils/optimin/.gitignore new file mode 100644 index 00000000..46f42f8f --- /dev/null +++ b/utils/optimin/.gitignore @@ -0,0 +1,11 @@ +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps diff --git a/utils/optimin/CMakeLists.txt b/utils/optimin/CMakeLists.txt new file mode 100644 index 00000000..b45dd004 --- /dev/null +++ b/utils/optimin/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.10) + +project(optimin + LANGUAGES CXX + DESCRIPTION "MaxSAT-based fuzzing corpus minimizer" +) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") + +# Add LLVM +find_package(LLVM REQUIRED CONFIG) +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + +include_directories(${LLVM_INCLUDE_DIRS}) +add_definitions(${LLVM_DEFINITIONS} -DNDEBUG) + +add_subdirectory(EvalMaxSAT) +add_subdirectory(src) diff --git a/utils/optimin/EVALMAXSAT_VERSION b/utils/optimin/EVALMAXSAT_VERSION new file mode 100644 index 00000000..d836ff1c --- /dev/null +++ b/utils/optimin/EVALMAXSAT_VERSION @@ -0,0 +1 @@ +440bf90edf88f6ab940934129e3c5b3b93764295 diff --git a/utils/optimin/EvalMaxSAT b/utils/optimin/EvalMaxSAT new file mode 160000 +Subproject 440bf90edf88f6ab940934129e3c5b3b9376429 diff --git a/utils/optimin/README.md b/utils/optimin/README.md new file mode 100644 index 00000000..5001b59d --- /dev/null +++ b/utils/optimin/README.md @@ -0,0 +1,74 @@ +# OptiMin + +OptiMin is a corpus minimizer that uses a +[MaxSAT](https://en.wikipedia.org/wiki/Maximum_satisfiability_problem) solver +to identify a subset of functionally distinct files that exercise different code +paths in a target program. + +Unlike most corpus minimizers, such as `afl-cmin`, OptiMin does not rely on +heuristic and/or greedy algorithms to identify these functionally distinct +files. This means that minimized corpora are generally much smaller than those +produced by other tools. + +## Usage + +To build the `optimin` executable (when cloned from github): + +```bash +# Ensure EvalMaxSAT is available +git submodule init +git submodule update + +mkdir build +cd build + +# You may have to specify -DLLVM_DIR=`llvm-config --cmakedir` if you have a +# non-standard LLVM install (e.g., install via apt) +cmake .. +make -j +make install +``` + +Otherwise, run the `build_optimin.sh` script. Running `optimin` is the same as +running `afl-cmin`. + +### Weighted Minimizations + +OptiMin allows for weighted minimizations. For examples, seeds can be weighted +by file size (or execution time), thus preferencing the selection of smaller (or +faster) seeds. + +To perform a weighted minimization, supply a CSV file with the `-w` option. This +CSV file is formatted as follows: + +``` +SEED_1,WEIGHT_1 +SEED_2,WEIGHT_2 +... +SEED_N,WEIGHT_N +``` + +Where `SEED_N` is the file name (**not** path) of a seed in the input directory, +and `WEIGHT_N` is an integer weight. + +## Further Details and Citation + +For more details, please see the paper [Seed Selection for Successful +Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). If you use OptiMin in +your research, please cite this paper. + +Bibtex: + +```bibtex +@inproceedings{Herrera:2021:FuzzSeedSelection, + author = {Adrian Herrera and Hendra Gunadi and Shane Magrath and Michael Norrish and Mathias Payer and Antony L. Hosking}, + title = {Seed Selection for Successful Fuzzing}, + booktitle = {30th ACM SIGSOFT International Symposium on Software Testing and Analysis}, + series = {ISSTA}, + year = {2021}, + pages = {230--243}, + numpages = {14}, + location = {Virtual, Denmark}, + publisher = {Association for Computing Machinery}, +} +``` diff --git a/utils/optimin/build_optimin.sh b/utils/optimin/build_optimin.sh new file mode 100755 index 00000000..7397aa45 --- /dev/null +++ b/utils/optimin/build_optimin.sh @@ -0,0 +1,130 @@ +#!/bin/sh +# +# american fuzzy lop++ - optimin build script +# ------------------------------------------------ +# +# Originally written by Nathan Voss <njvoss99@gmail.com> +# +# Adapted from code by Andrew Griffiths <agriffiths@google.com> and +# Michal Zalewski +# +# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co> +# +# Copyright 2017 Battelle Memorial Institute. All rights reserved. +# Copyright 2019-2020 AFLplusplus Project. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# This script builds the OptiMin corpus minimizer. + +EVALMAXSAT_VERSION="$(cat ./EVALMAXSAT_VERSION)" +EVALMAXSAT_REPO="https://github.com/FlorentAvellaneda/EvalMaxSAT" + +echo "=================================================" +echo "OptiMin build script" +echo "=================================================" +echo + +echo "[*] Performing basic sanity checks..." + +PLT=`uname -s` + +if [ ! -f "../../config.h" ]; then + + echo "[-] Error: key files not found - wrong working directory?" + exit 1 + +fi + +LLVM_CONFIG="${LLVM_CONFIG:-llvm-config}" +CMAKECMD=cmake +MAKECMD=make +TARCMD=tar + +if [ "$PLT" = "Darwin" ]; then + CORES=`sysctl -n hw.ncpu` + TARCMD=tar +fi + +if [ "$PLT" = "FreeBSD" ]; then + MAKECMD=gmake + CORES=`sysctl -n hw.ncpu` + TARCMD=gtar +fi + +if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then + MAKECMD=gmake + CORES=`sysctl -n hw.ncpu` + TARCMD=gtar +fi + +PREREQ_NOTFOUND= +for i in git $CMAKECMD $MAKECMD $TARCMD; do + + T=`command -v "$i" 2>/dev/null` + + if [ "$T" = "" ]; then + + echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar." + PREREQ_NOTFOUND=1 + + fi + +done + +if echo "$CC" | grep -qF /afl-; then + + echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool." + PREREQ_NOTFOUND=1 + +fi + +if [ "$PREREQ_NOTFOUND" = "1" ]; then + exit 1 +fi + +echo "[+] All checks passed!" + +echo "[*] Making sure EvalMaxSAT is checked out" + +git status 1>/dev/null 2>/dev/null +if [ $? -eq 0 ]; then + echo "[*] initializing EvalMaxSAT submodule" + git submodule init || exit 1 + git submodule update ./EvalMaxSAT 2>/dev/null # ignore errors +else + echo "[*] cloning EvalMaxSAT" + test -d EvalMaxSAT || { + CNT=1 + while [ '!' -d EvalMaxSAT -a "$CNT" -lt 4 ]; do + echo "Trying to clone EvalMaxSAT (attempt $CNT/3)" + git clone "$GRAMMAR_REPO" + CNT=`expr "$CNT" + 1` + done + } +fi + +test -d EvalMaxSAT || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +echo "[+] Got EvalMaxSAT." + +cd "EvalMaxSAT" || exit 1 +echo "[*] Checking out $EVALMAXSAT_VERSION" +sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null +git checkout "$EVALMAXSAT_VERSION" || exit 1 +cd .. + +echo +echo +echo "[+] EvalMaxSAT successfully prepared!" +echo "[+] Building OptiMin now." +mkdir -p build +cd build +cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` +make -j$CORES +cd .. +echo +echo "[+] OptiMin successfully built!" diff --git a/utils/optimin/src/CMakeLists.txt b/utils/optimin/src/CMakeLists.txt new file mode 100644 index 00000000..f31ceeaf --- /dev/null +++ b/utils/optimin/src/CMakeLists.txt @@ -0,0 +1,12 @@ +add_executable(optimin OptiMin.cpp) + +foreach(LIB MaLib EvalMaxSAT glucose) + target_include_directories(optimin PRIVATE + "${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src") + target_link_libraries(optimin ${LIB}) +endforeach(LIB) + +llvm_map_components_to_libnames(LLVM_LIBS support) +target_link_libraries(optimin ${LLVM_LIBS}) + +install(TARGETS optimin RUNTIME DESTINATION bin) diff --git a/utils/optimin/src/OptiMin.cpp b/utils/optimin/src/OptiMin.cpp new file mode 100644 index 00000000..e02fcbe5 --- /dev/null +++ b/utils/optimin/src/OptiMin.cpp @@ -0,0 +1,455 @@ +/* + * OptiMin, an optimal fuzzing corpus minimizer. + * + * Author: Adrian Herrera + */ + +#include <cstdint> +#include <vector> + +#include <llvm/ADT/DenseSet.h> +#include <llvm/ADT/DenseMap.h> +#include <llvm/ADT/SmallVector.h> +#include <llvm/ADT/StringExtras.h> +#include <llvm/ADT/StringMap.h> +#include <llvm/Support/Chrono.h> +#include <llvm/Support/CommandLine.h> +#include <llvm/Support/FileSystem.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Support/Path.h> +#include <llvm/Support/Program.h> +#include <llvm/Support/WithColor.h> + +#include "EvalMaxSAT.h" +#include "ProgressBar.h" + +using namespace llvm; + +namespace { + +// -------------------------------------------------------------------------- // +// Classes +// -------------------------------------------------------------------------- // + +/// Ensure seed weights default to 1 +class Weight { + public: + Weight() : Weight(1){}; + Weight(uint32_t V) : Value(V){}; + + operator unsigned() const { + return Value; + } + + private: + const unsigned Value; +}; + +// -------------------------------------------------------------------------- // +// Typedefs +// -------------------------------------------------------------------------- // + +/// AFL tuple (edge) ID +using AFLTupleID = uint32_t; + +/// Pair of tuple ID and hit count +using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>; + +/// Coverage for a given seed file +using AFLCoverageVector = std::vector<AFLTuple>; + +/// Maps seed file paths to a weight +using WeightsMap = StringMap<Weight>; + +/// A seed identifier in the MaxSAT solver +using SeedID = int; + +/// Associates seed identifiers to seed files +using MaxSATSeeds = + SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>; + +/// Set of literal identifiers +using MaxSATSeedSet = DenseSet<SeedID>; + +/// Maps tuple IDs to the literal identifiers that "cover" that tuple +using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>; + +// -------------------------------------------------------------------------- // +// Global variables +// -------------------------------------------------------------------------- // + +// This is based on the human class count in `count_class_human[256]` in +// `afl-showmap.c` +static constexpr uint32_t MAX_EDGE_FREQ = 8; + +static sys::TimePoint<> StartTime, EndTime; +static std::chrono::seconds Duration; + +static std::string AFLShowmapPath; +static bool TargetArgsHasAtAt = false; + +static const auto ErrMsg = [] { + return WithColor(errs(), HighlightColor::Error) << "[-] "; +}; +static const auto WarnMsg = [] { + return WithColor(errs(), HighlightColor::Warning) << "[-] "; +}; +static const auto SuccMsg = [] { + return WithColor(outs(), HighlightColor::String) << "[+] "; +}; +static const auto StatMsg = [] { + return WithColor(outs(), HighlightColor::Remark) << "[*] "; +}; + +static cl::opt<std::string> CorpusDir("i", cl::desc("Input directory"), + cl::value_desc("dir"), cl::Required); +static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"), + cl::value_desc("dir"), cl::Required); +static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"), + cl::init(true)); +static cl::opt<bool> ShowProgBar("p", cl::desc("Display progress bar")); +static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"), + cl::value_desc("csv")); +static cl::opt<std::string> TargetProg(cl::Positional, + cl::desc("<target program>"), + cl::Required); +static cl::list<std::string> TargetArgs(cl::ConsumeAfter, + cl::desc("[target args...]")); +static cl::opt<std::string> MemLimit( + "m", cl::desc("Memory limit for child process (default=none)"), + cl::value_desc("megs"), cl::init("none")); +static cl::opt<std::string> Timeout( + "t", cl::desc("Run time limit for child process (default=none)"), + cl::value_desc("msec"), cl::init("none")); +static cl::opt<bool> CrashMode( + "C", cl::desc("Keep crashing inputs, reject everything else")); +static cl::opt<bool> QemuMode("Q", cl::desc("Use binary-only instrumentation")); +} // anonymous namespace + +// -------------------------------------------------------------------------- // +// Helper functions +// -------------------------------------------------------------------------- // + +static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) { + SmallVector<StringRef, 0> Lines; + MB.getBuffer().trim().split(Lines, '\n'); + + unsigned Weight = 0; + + for (const auto &Line : Lines) { + const auto &[Seed, WeightStr] = Line.split(','); + + if (to_integer(WeightStr, Weight, 10)) { + Weights.try_emplace(Seed, Weight); + } else { + WarnMsg() << "Failed to read weight for `" << Seed << "`. Skipping...\n"; + } + } +} + +[[nodiscard]] static std::error_code getAFLCoverage(const StringRef Seed, + AFLCoverageVector &Cov) { + Optional<StringRef> Redirects[] = {None, None, None}; + std::error_code EC; + + // Create temporary output file + SmallString<64> OutputPath; + if (EC = sys::fs::createTemporaryFile("showmap", "txt", OutputPath)) + return EC; + + // Prepare afl-showmap arguments + SmallVector<StringRef, 12> AFLShowmapArgs{ + AFLShowmapPath, "-m", MemLimit, "-t", Timeout, "-q", "-o", OutputPath}; + + if (TargetArgsHasAtAt) + AFLShowmapArgs.append({"-A", Seed}); + else + Redirects[/* stdin */ 0] = Seed; + + if (QemuMode) AFLShowmapArgs.push_back("-Q"); + if (CrashMode) AFLShowmapArgs.push_back("-C"); + + AFLShowmapArgs.append({"--", TargetProg}); + AFLShowmapArgs.append(TargetArgs.begin(), TargetArgs.end()); + + // Run afl-showmap + const int RC = sys::ExecuteAndWait(AFLShowmapPath, AFLShowmapArgs, + /*env=*/None, Redirects); + if (RC) return std::make_error_code(std::errc::executable_format_error); + + // Parse afl-showmap output + const auto CovOrErr = MemoryBuffer::getFile(OutputPath); + if (EC = CovOrErr.getError()) { + sys::fs::remove(OutputPath); + return EC; + } + + SmallVector<StringRef, 0> Lines; + CovOrErr.get()->getBuffer().trim().split(Lines, '\n'); + + AFLTupleID Edge = 0; + unsigned Freq = 0; + + for (const auto &Line : Lines) { + const auto &[EdgeStr, FreqStr] = Line.split(':'); + + to_integer(EdgeStr, Edge, 10); + to_integer(FreqStr, Freq, 10); + Cov.push_back({Edge, Freq}); + } + + return sys::fs::remove(OutputPath); +} + +static inline void StartTimer(bool ShowProgBar) { + StartTime = std::chrono::system_clock::now(); +} + +static inline void EndTimer(bool ShowProgBar) { + EndTime = std::chrono::system_clock::now(); + Duration = + std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime); + + if (ShowProgBar) + outs() << '\n'; + else + outs() << Duration.count() << "s\n"; +} + +// -------------------------------------------------------------------------- // +// Main function +// -------------------------------------------------------------------------- // + +int main(int argc, char *argv[]) { + WeightsMap Weights; + ProgressBar ProgBar; + std::error_code EC; + + // ------------------------------------------------------------------------ // + // Parse command-line options + // + // Also check the target arguments, as this determines how we run afl-showmap. + // ------------------------------------------------------------------------ // + + cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer"); + + if (!sys::fs::is_directory(OutputDir)) { + ErrMsg() << "Invalid output directory `" << OutputDir << "`\n"; + return 1; + } + + for (const auto &Arg : TargetArgs) + if (Arg == "@@") TargetArgsHasAtAt = true; + + // ------------------------------------------------------------------------ // + // Find afl-showmap + // ------------------------------------------------------------------------ // + + const auto AFLShowmapOrErr = sys::findProgramByName("afl-showmap"); + if (AFLShowmapOrErr.getError()) { + ErrMsg() << "Failed to find afl-showmap. Check your PATH\n"; + return 1; + } + AFLShowmapPath = *AFLShowmapOrErr; + + // ------------------------------------------------------------------------ // + // Parse weights + // + // Weights are stored in CSV file mapping a seed file name to an integer + // greater than zero. + // ------------------------------------------------------------------------ // + + if (WeightsFile != "") { + StatMsg() << "Reading weights from `" << WeightsFile << "`... "; + StartTimer(/*ShowProgBar=*/false); + + const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile); + if (EC = WeightsOrErr.getError()) { + ErrMsg() << "Failed to read weights from `" << WeightsFile + << "`: " << EC.message() << '\n'; + return 1; + } + + GetWeights(*WeightsOrErr.get(), Weights); + + EndTimer(/*ShowProgBar=*/false); + } + + // ------------------------------------------------------------------------ // + // Traverse corpus directory + // + // Find the seed files inside this directory. + // ------------------------------------------------------------------------ // + + StatMsg() << "Locating seeds in `" << CorpusDir << "`... "; + StartTimer(/*ShowProgBar=*/false); + + std::vector<std::string> SeedFiles; + sys::fs::file_status Status; + + for (sys::fs::directory_iterator Dir(CorpusDir, EC), DirEnd; + Dir != DirEnd && !EC; Dir.increment(EC)) { + if (EC) { + ErrMsg() << "Failed to traverse corpus directory `" << CorpusDir + << "`: " << EC.message() << '\n'; + return 1; + } + + const auto &Path = Dir->path(); + if (EC = sys::fs::status(Path, Status)) { + WarnMsg() << "Failed to access seed file `" << Path + << "`: " << EC.message() << ". Skipping...\n"; + continue; + } + + switch (Status.type()) { + case sys::fs::file_type::regular_file: + case sys::fs::file_type::symlink_file: + case sys::fs::file_type::type_unknown: + SeedFiles.push_back(Path); + default: + /* Ignore */ + break; + } + } + + EndTimer(/*ShowProgBar=*/false); + + // ------------------------------------------------------------------------ // + // Generate seed coverage + // + // Iterate over the corpus directory, which should contain seed files. Execute + // these seeds in the target program to generate coverage information, and + // then store this coverage information in the appropriate data structures. + // ------------------------------------------------------------------------ // + + size_t SeedCount = 0; + const size_t NumSeeds = SeedFiles.size(); + + if (!ShowProgBar) + StatMsg() << "Generating coverage for " << NumSeeds << " seeds... "; + StartTimer(ShowProgBar); + + EvalMaxSAT Solver(/*nbMinimizeThread=*/0); + MaxSATSeeds SeedVars; + MaxSATCoverageMap SeedCoverage; + AFLCoverageVector Cov; + + for (const auto &SeedFile : SeedFiles) { + // Execute seed + Cov.clear(); + if (EC = getAFLCoverage(SeedFile, Cov)) { + ErrMsg() << "Failed to get coverage for seed " << SeedFile << ": " + << EC.message() << '\n'; + return 1; + } + + // Create a variable to represent the seed + const SeedID Var = Solver.newVar(); + SeedVars.push_back({Var, SeedFile}); + + // Record the set of seeds that cover a particular edge + for (const auto &[Edge, Freq] : Cov) { + if (EdgesOnly) { + // Ignore edge frequency + SeedCoverage[Edge].insert(Var); + } else { + // Executing edge `E` `N` times means that it was executed `N - 1` times + for (unsigned I = 0; I < Freq; ++I) + SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var); + } + } + + if ((++SeedCount % 10 == 0) && ShowProgBar) + ProgBar.update(SeedCount * 100 / NumSeeds, "Generating seed coverage"); + } + + EndTimer(ShowProgBar); + + // ------------------------------------------------------------------------ // + // Set the hard and soft constraints in the solver + // ------------------------------------------------------------------------ // + + if (!ShowProgBar) StatMsg() << "Generating constraints... "; + StartTimer(ShowProgBar); + + SeedCount = 0; + + // Ensure that at least one seed is selected that covers a particular edge + // (hard constraint) + std::vector<SeedID> Clauses; + for (const auto &[_, Seeds] : SeedCoverage) { + if (Seeds.empty()) continue; + + Clauses.clear(); + for (const auto &Seed : Seeds) + Clauses.push_back(Seed); + + Solver.addClause(Clauses); + + if ((++SeedCount % 10 == 0) && ShowProgBar) + ProgBar.update(SeedCount * 100 / SeedCoverage.size(), + "Generating clauses"); + } + + // Select the minimum number of seeds that cover a particular set of edges + // (soft constraint) + for (const auto &[Var, Seed] : SeedVars) + Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]); + + EndTimer(ShowProgBar); + + // ------------------------------------------------------------------------ // + // Generate a solution + // ------------------------------------------------------------------------ // + + StatMsg() << "Solving... "; + StartTimer(/*ShowProgBar=*/false); + + const bool Solved = Solver.solve(); + + EndTimer(/*ShowProgBar=*/false); + + // ------------------------------------------------------------------------ // + // Save the solution + // + // This will copy the selected seeds to the given output directory. + // ------------------------------------------------------------------------ // + + SmallVector<StringRef, 64> Solution; + SmallString<32> OutputSeed; + + if (Solved) { + for (const auto &[Var, Seed] : SeedVars) + if (Solver.getValue(Var) > 0) Solution.push_back(Seed); + } else { + ErrMsg() << "Failed to find an optimal solution for `" << CorpusDir + << "`\n"; + return 1; + } + + SuccMsg() << "Minimized corpus size: " << Solution.size() << " seeds\n"; + + if (!ShowProgBar) StatMsg() << "Copying to `" << OutputDir << "`... "; + StartTimer(ShowProgBar); + + SeedCount = 0; + + for (const auto &Seed : Solution) { + OutputSeed = OutputDir; + sys::path::append(OutputSeed, sys::path::filename(Seed)); + + if (EC = sys::fs::copy_file(Seed, OutputSeed)) { + WarnMsg() << "Failed to copy `" << Seed << "` to `" << OutputDir + << "`: " << EC.message() << '\n'; + } + + if ((++SeedCount % 10 == 0) && ShowProgBar) + ProgBar.update(SeedCount * 100 / Solution.size(), "Copying seeds"); + } + + EndTimer(ShowProgBar); + SuccMsg() << "Done!\n"; + + return 0; +} diff --git a/utils/optimin/src/ProgressBar.h b/utils/optimin/src/ProgressBar.h new file mode 100644 index 00000000..2f8d7403 --- /dev/null +++ b/utils/optimin/src/ProgressBar.h @@ -0,0 +1,58 @@ +/** + * Progress bar. + * + * Adapted from https://www.bfilipek.com/2020/02/inidicators.html + */ + +#pragma once + +#include <llvm/ADT/StringRef.h> +#include <llvm/Support/raw_ostream.h> + +/// Display a progress bar in the terminal +class ProgressBar { + private: + const size_t BarWidth; + const std::string Fill; + const std::string Remainder; + + public: + ProgressBar() : ProgressBar(60, "#", " ") { + } + + ProgressBar(size_t Width, const llvm::StringRef F, const llvm::StringRef R) + : BarWidth(Width), Fill(F), Remainder(R) { + } + + void update(float Progress, const llvm::StringRef Status = "", + llvm::raw_ostream &OS = llvm::outs()) { + // No need to write once progress is 100% + if (Progress > 100.0f) return; + + // Move cursor to the first position on the same line and flush + OS << '\r'; + OS.flush(); + + // Start bar + OS << '['; + + const auto Completed = + static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0); + for (size_t I = 0; I < BarWidth; ++I) { + if (I <= Completed) { + OS << Fill; + } else { + OS << Remainder; + } + } + + // End bar + OS << ']'; + + // Write progress percentage + OS << ' ' << std::min(static_cast<size_t>(Progress), size_t(100)) << '%'; + + // Write status text + OS << " " << Status; + } +}; |