/* * OptiMin, an optimal fuzzing corpus minimizer. * * Author: Adrian Herrera */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "EvalMaxSAT.h" using namespace llvm; namespace { // -------------------------------------------------------------------------- // // Classes // -------------------------------------------------------------------------- // /// Ensure seed weights default to 1 class Weight { public: Weight() : Weight(1){}; Weight(uint32_t V) : Value(V){}; operator unsigned() const { return Value; } private: const unsigned Value; }; // -------------------------------------------------------------------------- // // Typedefs // -------------------------------------------------------------------------- // /// AFL tuple (edge) ID using AFLTupleID = uint32_t; /// Pair of tuple ID and hit count using AFLTuple = std::pair; /// Coverage for a given seed file using AFLCoverageVector = std::vector; /// Map seed file paths to its coverage vector using AFLCoverageMap = StringMap; /// Map seed file paths to a weight using WeightsMap = StringMap; /// A seed identifier in the MaxSAT solver using SeedID = int; /// Associates seed identifiers to seed files using MaxSATSeeds = SmallVector, 0>; /// Set of literal identifiers using MaxSATSeedSet = DenseSet; /// Maps tuple IDs to the literal identifiers that "cover" that tuple using MaxSATCoverageMap = DenseMap; // -------------------------------------------------------------------------- // // Global variables // -------------------------------------------------------------------------- // // This is based on the human class count in `count_class_human[256]` in // `afl-showmap.c` static constexpr uint32_t MAX_EDGE_FREQ = 8; // The maximum number of failures allowed when parsing a weights file static constexpr unsigned MAX_WEIGHT_FAILURES = 5; static sys::TimePoint<> StartTime, EndTime; static std::chrono::seconds Duration; static std::string ShowmapPath; static bool TargetArgsHasAtAt = false; static bool KeepTraces = false; static bool SkipBinCheck = false; static const auto ErrMsg = [] { return WithColor(errs(), raw_ostream::RED, /*Bold=*/true) << "[-] "; }; static const auto WarnMsg = [] { return WithColor(errs(), raw_ostream::MAGENTA, /*Bold=*/true) << "[-] "; }; static const auto SuccMsg = [] { return WithColor(outs(), raw_ostream::GREEN, /*Bold=*/true) << "[+] "; }; static const auto StatMsg = [] { return WithColor(outs(), raw_ostream::BLUE, /*Bold=*/true) << "[*] "; }; static cl::opt InputDir("i", cl::desc("Input directory"), cl::value_desc("dir"), cl::Required); static cl::opt OutputDir("o", cl::desc("Output directory"), cl::value_desc("dir"), cl::Required); static cl::opt EdgesOnly("f", cl::desc("Include edge hit counts"), cl::init(true)); static cl::opt WeightsFile("w", cl::desc("Weights file"), cl::value_desc("csv")); static cl::opt TargetProg(cl::Positional, cl::desc(""), cl::Required); static cl::list TargetArgs(cl::ConsumeAfter, cl::desc("[target args...]")); static cl::opt MemLimit( "m", cl::desc("Memory limit for child process (default=none)"), cl::value_desc("megs"), cl::init("none")); static cl::opt Timeout( "t", cl::desc("Run time limit for child process (default=5000)"), cl::value_desc("msec"), cl::init("5000")); static cl::opt CrashMode( "C", cl::desc("Keep crashing inputs, reject everything else")); static cl::opt FridaMode( "O", cl::desc("Use binary-only instrumentation (FRIDA mode)")); static cl::opt QemuMode( "Q", cl::desc("Use binary-only instrumentation (QEMU mode)")); static cl::opt UnicornMode( "U", cl::desc("Use unicorn-based instrumentation (unicorn mode)")); } // anonymous namespace // -------------------------------------------------------------------------- // // Helper functions // -------------------------------------------------------------------------- // static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) { SmallVector Lines; MB.getBuffer().trim().split(Lines, '\n'); unsigned FailureCount = 0; unsigned Weight = 0; for (const auto &Line : Lines) { const auto &[Seed, WeightStr] = Line.split(','); if (to_integer(WeightStr, Weight, 10)) { Weights.try_emplace(Seed, Weight); } else { if (FailureCount >= MAX_WEIGHT_FAILURES) { ErrMsg() << "Too many failures. Aborting\n"; std::exit(1); } WarnMsg() << "Failed to read weight for '" << Seed << "'. Skipping...\n"; FailureCount++; } } } static std::error_code readCov(const StringRef Trace, AFLCoverageVector &Cov) { const auto CovOrErr = MemoryBuffer::getFile(Trace); if (const auto EC = CovOrErr.getError()) return EC; SmallVector Lines; CovOrErr.get()->getBuffer().trim().split(Lines, '\n'); AFLTupleID Edge = 0; unsigned Freq = 0; for (const auto &Line : Lines) { const auto &[EdgeStr, FreqStr] = Line.split(':'); to_integer(EdgeStr, Edge, 10); to_integer(FreqStr, Freq, 10); Cov.push_back({Edge, Freq}); } return std::error_code(); } static Error runShowmap(AFLCoverageMap &CovMap, const StringRef Input, bool BinCheck = false) { const bool InputIsFile = !sys::fs::is_directory(Input); Optional Redirects[] = {None, None, None}; SmallString<32> TraceDir{OutputDir}; sys::path::append(TraceDir, ".traces"); SmallString<32> Output{TraceDir}; SmallString<32> StdinFile{TraceDir}; // ------------------------------------------------------------------------ // // Prepare afl-showmap arguments // // If the given input is a file, then feed this directly into stdin. // Otherwise, if it is a directory, specify this on the afl-showmap command // line. // ------------------------------------------------------------------------ // SmallVector ShowmapArgs{ShowmapPath, "-q", "-m", MemLimit, "-t", Timeout}; if (InputIsFile) { StdinFile = Input; sys::path::append(Output, BinCheck ? ".run_test" : sys::path::filename(Input)); } else { sys::path::append(StdinFile, ".cur_input"); ShowmapArgs.append({"-i", Input}); } if (TargetArgsHasAtAt) { ShowmapArgs.append({"-A", StdinFile}); Redirects[/* stdin */ 0] = "/dev/null"; } else if (InputIsFile) { Redirects[/* stdin */ 0] = Input; } if (FridaMode) ShowmapArgs.push_back("-O"); if (QemuMode) ShowmapArgs.push_back("-Q"); if (UnicornMode) ShowmapArgs.push_back("-U"); ShowmapArgs.append({"-o", Output, "--", TargetProg}); ShowmapArgs.append(TargetArgs.begin(), TargetArgs.end()); // ------------------------------------------------------------------------ // // Run afl-showmap // ------------------------------------------------------------------------ // const int RC = sys::ExecuteAndWait(ShowmapPath, ShowmapArgs, /*env=*/None, Redirects); if (RC && !CrashMode) { ErrMsg() << "Exit code " << RC << " != 0 received from afl-showmap\n"; return createStringError(inconvertibleErrorCode(), "afl-showmap failed"); } // ------------------------------------------------------------------------ // // Parse afl-showmap output // ------------------------------------------------------------------------ // AFLCoverageVector Cov; std::error_code EC; sys::fs::file_status Status; if (InputIsFile) { // Read a single output coverage file if ((EC = readCov(Output, Cov))) { sys::fs::remove(Output); return errorCodeToError(EC); } CovMap.try_emplace(sys::path::filename(Input), Cov); if (!KeepTraces) sys::fs::remove(Output); } else { // Read a directory of output coverage files for (sys::fs::recursive_directory_iterator Dir(TraceDir, EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { if (EC) return errorCodeToError(EC); const auto &Path = Dir->path(); if ((EC = sys::fs::status(Path, Status))) return errorCodeToError(EC); switch (Status.type()) { case sys::fs::file_type::regular_file: case sys::fs::file_type::symlink_file: case sys::fs::file_type::type_unknown: Cov.clear(); if ((EC = readCov(Path, Cov))) { sys::fs::remove(Path); return errorCodeToError(EC); } CovMap.try_emplace(sys::path::filename(Path), Cov); default: // Ignore break; } } if (!KeepTraces) sys::fs::remove_directories(TraceDir); } return Error::success(); } static inline void StartTimer() { StartTime = std::chrono::system_clock::now(); } static inline void EndTimer() { EndTime = std::chrono::system_clock::now(); Duration = std::chrono::duration_cast(EndTime - StartTime); SuccMsg() << " Completed in " << Duration.count() << " s\n"; } // -------------------------------------------------------------------------- // // Main function // -------------------------------------------------------------------------- // int main(int argc, char *argv[]) { WeightsMap Weights; std::error_code EC; // ------------------------------------------------------------------------ // // Parse command-line options and environment variables // // Also check the target arguments, as this determines how we run afl-showmap. // ------------------------------------------------------------------------ // cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer"); KeepTraces = !!std::getenv("AFL_KEEP_TRACES"); SkipBinCheck = !!std::getenv("AFL_SKIP_BIN_CHECK"); const auto AFLPath = std::getenv("AFL_PATH"); if (CrashMode) ::setenv("AFL_CMIN_CRASHES_ONLY", "1", /*overwrite=*/true); for (const auto &Arg : TargetArgs) if (Arg == "@@") TargetArgsHasAtAt = true; // ------------------------------------------------------------------------ // // Find afl-showmap // ------------------------------------------------------------------------ // SmallVector EnvPaths; if (const char *PathEnv = std::getenv("PATH")) SplitString(PathEnv, EnvPaths, ":"); if (AFLPath) EnvPaths.push_back(AFLPath); const auto ShowmapOrErr = sys::findProgramByName("afl-showmap", EnvPaths); if (ShowmapOrErr.getError()) { ErrMsg() << "Failed to find afl-showmap. Check your PATH\n"; return 1; } ShowmapPath = *ShowmapOrErr; // ------------------------------------------------------------------------ // // Parse weights // // Weights are stored in CSV file mapping a seed file name to an integer // greater than zero. // ------------------------------------------------------------------------ // if (WeightsFile != "") { StatMsg() << "Reading weights from '" << WeightsFile << "'...\n"; StartTimer(); const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile); if ((EC = WeightsOrErr.getError())) { ErrMsg() << "Failed to read weights from '" << WeightsFile << "': " << EC.message() << '\n'; return 1; } GetWeights(*WeightsOrErr.get(), Weights); EndTimer(); } // ------------------------------------------------------------------------ // // Traverse input directory // // Find the seed files inside this directory (and subdirectories). // ------------------------------------------------------------------------ // StatMsg() << "Locating seeds in '" << InputDir << "'...\n"; StartTimer(); bool IsDirResult; if ((EC = sys::fs::is_directory(InputDir, IsDirResult))) { ErrMsg() << "Invalid input directory '" << InputDir << "': " << EC.message() << '\n'; return 1; } sys::fs::file_status Status; StringMap SeedFiles; for (sys::fs::recursive_directory_iterator Dir(InputDir, EC), DirEnd; Dir != DirEnd && !EC; Dir.increment(EC)) { if (EC) { ErrMsg() << "Failed to traverse input directory '" << InputDir << "': " << EC.message() << '\n'; return 1; } const auto &Path = Dir->path(); if ((EC = sys::fs::status(Path, Status))) { ErrMsg() << "Failed to access '" << Path << "': " << EC.message() << '\n'; return 1; } switch (Status.type()) { case sys::fs::file_type::regular_file: case sys::fs::file_type::symlink_file: case sys::fs::file_type::type_unknown: SeedFiles.try_emplace(sys::path::filename(Path), sys::path::parent_path(Path)); default: /* Ignore */ break; } } EndTimer(); if (SeedFiles.empty()) { ErrMsg() << "Failed to find any seed files in '" << InputDir << "'\n"; return 1; } // ------------------------------------------------------------------------ // // Setup output directory // ------------------------------------------------------------------------ // SmallString<32> TraceDir{OutputDir}; sys::path::append(TraceDir, ".traces"); if ((EC = sys::fs::remove_directories(TraceDir))) { ErrMsg() << "Failed to remove existing trace directory in '" << OutputDir << "': " << EC.message() << '\n'; return 1; } if ((EC = sys::fs::create_directories(TraceDir))) { ErrMsg() << "Failed to create output directory '" << OutputDir << "': " << EC.message() << '\n'; return 1; } // ------------------------------------------------------------------------ // // Test the target binary // ------------------------------------------------------------------------ // AFLCoverageMap CovMap; if (!SkipBinCheck) { const auto It = SeedFiles.begin(); SmallString<32> TestSeed{It->second}; sys::path::append(TestSeed, It->first()); StatMsg() << "Testing the target binary with '" << TestSeed << "`...\n"; StartTimer(); if (auto Err = runShowmap(CovMap, TestSeed, /*BinCheck=*/true)) { ErrMsg() << "No instrumentation output detected \n"; return 1; } EndTimer(); SuccMsg() << "OK, " << CovMap.begin()->second.size() << " tuples recorded\n"; } // ------------------------------------------------------------------------ // // Generate seed coverage // // Iterate over the corpus directory, which should contain seed files. Execute // these seeds in the target program to generate coverage information, and // then store this coverage information in the appropriate data structures. // ------------------------------------------------------------------------ // StatMsg() << "Running afl-showmap on " << SeedFiles.size() << " seeds...\n"; StartTimer(); MaxSATSeeds SeedVars; MaxSATCoverageMap SeedCoverage; EvalMaxSAT Solver(/*nbMinimizeThread=*/0); CovMap.clear(); if (auto Err = runShowmap(CovMap, InputDir)) { ErrMsg() << "Failed to generate coverage: " << Err << '\n'; return 1; } for (const auto &SeedCov : CovMap) { // Create a variable to represent the seed const SeedID Var = Solver.newVar(); SeedVars.emplace_back(Var, SeedCov.first()); // Record the set of seeds that cover a particular edge for (auto &[Edge, Freq] : SeedCov.second) { if (EdgesOnly) { // Ignore edge frequency SeedCoverage[Edge].insert(Var); } else { // Executing edge `E` `N` times means that it was executed `N - 1` times for (unsigned I = 0; I < Freq; ++I) SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var); } } } EndTimer(); // ------------------------------------------------------------------------ // // Set the hard and soft constraints in the solver // ------------------------------------------------------------------------ // StatMsg() << "Generating constraints...\n"; StartTimer(); size_t SeedCount = 0; // Ensure that at least one seed is selected that covers a particular edge // (hard constraint) std::vector Clauses; for (const auto &[_, Seeds] : SeedCoverage) { if (Seeds.empty()) continue; Clauses.clear(); for (const auto &Seed : Seeds) Clauses.push_back(Seed); Solver.addClause(Clauses); } // Select the minimum number of seeds that cover a particular set of edges // (soft constraint) for (const auto &[Var, Seed] : SeedVars) Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]); EndTimer(); // ------------------------------------------------------------------------ // // Generate a solution // ------------------------------------------------------------------------ // StatMsg() << "Solving...\n"; StartTimer(); const bool Solved = Solver.solve(); EndTimer(); // ------------------------------------------------------------------------ // // Save the solution // // This will copy the selected seeds to the given output directory. // ------------------------------------------------------------------------ // SmallVector Solution; SmallString<32> InputSeed, OutputSeed; if (Solved) { for (const auto &[Var, Seed] : SeedVars) if (Solver.getValue(Var) > 0) Solution.push_back(Seed); } else { ErrMsg() << "Failed to find an optimal solution for '" << InputDir << "'\n"; return 1; } StatMsg() << "Copying " << Solution.size() << " seeds to '" << OutputDir << "'...\n"; StartTimer(); SeedCount = 0; for (const auto &Seed : Solution) { InputSeed = SeedFiles[Seed]; sys::path::append(InputSeed, Seed); OutputSeed = OutputDir; sys::path::append(OutputSeed, Seed); if ((EC = sys::fs::copy_file(InputSeed, OutputSeed))) { ErrMsg() << "Failed to copy '" << Seed << "' to '" << OutputDir << "': " << EC.message() << '\n'; return 1; } } EndTimer(); SuccMsg() << "Done!\n"; return 0; }