From 09053680920989425469b0b7df3401c8cac38942 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Thu, 23 Nov 2023 20:07:48 +0900 Subject: Hack up brute-force decision tree construction --- lib/Core/Differentiator.cpp | 111 ++++++++++++++++++++++++++++++-------------- lib/Core/Differentiator.h | 7 ++- 2 files changed, 82 insertions(+), 36 deletions(-) diff --git a/lib/Core/Differentiator.cpp b/lib/Core/Differentiator.cpp index 2609e832..95c08bda 100644 --- a/lib/Core/Differentiator.cpp +++ b/lib/Core/Differentiator.cpp @@ -147,19 +147,41 @@ run(const std::uint64_t rev, const char* prog, return result; } -bool -Differentiator::isSymOut(const std::string& name) +void +logBytes(const Differentiator::Bytes& buffer) { - // string::starts_with requires C++20 - return (name[0] == 'o' && name[1] == 'u' && name[2] == 't' && name[3] == '!' - && '0' <= name[name.size() - 1] && name[name.size() - 1] <= '9'); + llvm::errs() << (int) buffer[0]; + for (size_t i = 1; i < buffer.size(); ++i) + llvm::errs() << ':' << (int) buffer[i]; } -Differentiator::Differentiator(std::unique_ptr* s, - time::Span& t, ArrayCache& ac) -: envs{{0, ""}}, prog{InputProgram}, solver{s}, solverTimeout{t}, - arrayCache{ac}, visitorA{'a', ac}, visitorB{'b', ac} -{} +void +logArgs(const Differentiator::TestArgs& args) +{ + llvm::errs() << "Args:"; + for (const auto& s : args) { + llvm::errs() << ' '; + logBytes(s); + } + llvm::errs() << '\n'; +} + +void +logClusters(const Differentiator::Clusters& clusters, bool text = true) +{ + for (const auto& [output, revisions] : clusters) { + llvm::errs() << "Revisions:"; + for (const auto& rev : revisions) + llvm::errs() << ' ' << rev; + llvm::errs() << '\n'; + if (text) { + llvm::errs() << std::string((const char*) output.data()); + } else { + logBytes(output); + llvm::errs() << '\n'; + } + } +} void Differentiator::extract(ExecutionState* a, ExecutionState* b, @@ -292,30 +314,51 @@ Differentiator::search(ExecutionState* latest) void Differentiator::log() { - for (const auto& t : this->tests) { - std::map> stdoutClusters; - std::map, - std::set> varClusters; - for (const auto& rev : t.second) { - for (const auto& var : rev.second.first) - varClusters[var].insert(rev.first); - stdoutClusters[rev.second.second].insert(rev.first); - if (stdoutClusters.size() > 1) { - llvm::errs() << "Args:"; - for (const auto& s : t.first) { - llvm::errs() << ' '; - for (const auto c : s) - llvm::errs() << (int) c << '.'; - } - llvm::errs() << '\n'; - for (const auto& so : stdoutClusters) { - llvm::errs() << "Revisions:"; - for (const auto& r : so.second) - llvm::errs() << ' ' << r; - llvm::errs() << '\n' << std::string((const char*) so.first.data()); - } - llvm::errs() << '\n'; - } + std::vector> clusterings; + for (auto& t : this->tests) { + Clusters clusters; + for (const auto& rev : this->revisions) { + Bytes& out = t.second[rev].second; + if (out.empty()) // backfill stdout + out = run(rev, this->prog, t.first); + clusters[out].insert(rev); } + + if (clusters.size() > 1) + clusterings.push_back({std::move(t.first), std::move(clusters)}); } + for (const auto& [args, clusters] : clusterings) { + logArgs(args); + logClusters(clusters); + llvm::errs() << '\n'; + } + + size_t minLump = this->revisions.size(); + std::map minDepth; + for (size_t i = 0u; i < this->revisions.size(); ++i) + minDepth[i] = this->revisions.size() - 1; + std::vector indices; + for (size_t i = 0u; i < clusterings.size(); ++i) + indices.push_back(i); + do { + std::map> diffed; + size_t depth = 0u; + for (const auto& i : indices) { + const auto& [args, clusters] = clusterings[i]; + for (const auto& [_, lump] : clusters) + for (const auto& key : lump) + for (const auto& val : this->revisions) + if (lump.find(val) == lump.end()) + diffed[key].insert(val); + size_t maxLump = 1u; + for (const auto& [rev, others] : diffed) + maxLump = std::max(this->revisions.size() - others.size(), maxLump); + minDepth[maxLump] = std::min(++depth, minDepth[maxLump]); + minLump = std::min(maxLump, minLump); + if (maxLump == 1) + break; + } + } while (std::next_permutation(indices.begin(), indices.end())); + llvm::errs() << minDepth[minLump] << " decisions with " + << minLump << " undiffed at most\n"; } diff --git a/lib/Core/Differentiator.h b/lib/Core/Differentiator.h index 3743743c..11225ac7 100644 --- a/lib/Core/Differentiator.h +++ b/lib/Core/Differentiator.h @@ -38,8 +38,11 @@ struct Differentiator { /// CLI arguments typedef std::vector TestArgs; /// :rev (:var val) stdout - typedef std::map, - Bytes>> TestOuts; + typedef std::map, + Bytes>> + TestOuts; + typedef std::map> Clusters; /// Extract revision number from given condition if any static std::pair extractPatchNumber(ref e); -- cgit 1.4.1