about summary refs log tree commit diff homepage
diff options
context:
space:
mode:
authorNguyễn Gia Phong <cnx@loang.net>2023-11-23 20:07:48 +0900
committerNguyễn Gia Phong <cnx@loang.net>2024-03-05 17:53:28 +0900
commit09053680920989425469b0b7df3401c8cac38942 (patch)
tree61a8f51bced1c61d7816ad1232e64a5dac1a0052
parent812c08bbc64cb0d2f82f2f44e83c5da390ee1355 (diff)
downloadklee-09053680920989425469b0b7df3401c8cac38942.tar.gz
Hack up brute-force decision tree construction
-rw-r--r--lib/Core/Differentiator.cpp111
-rw-r--r--lib/Core/Differentiator.h7
2 files changed, 82 insertions, 36 deletions
diff --git a/lib/Core/Differentiator.cpp b/lib/Core/Differentiator.cpp
index 2609e832..95c08bda 100644
--- a/lib/Core/Differentiator.cpp
+++ b/lib/Core/Differentiator.cpp
@@ -147,19 +147,41 @@ run(const std::uint64_t rev, const char* prog,
   return result;
 }
 
-bool
-Differentiator::isSymOut(const std::string& name)
+void
+logBytes(const Differentiator::Bytes& buffer)
 {
-  // string::starts_with requires C++20
-  return (name[0] == 'o' && name[1] == 'u' && name[2] == 't' && name[3] == '!'
-          && '0' <= name[name.size() - 1] && name[name.size() - 1] <= '9');
+  llvm::errs() << (int) buffer[0];
+  for (size_t i = 1; i < buffer.size(); ++i)
+    llvm::errs() << ':' << (int) buffer[i];
 }
 
-Differentiator::Differentiator(std::unique_ptr<TimingSolver>* s,
-                               time::Span& t, ArrayCache& ac)
-: envs{{0, ""}}, prog{InputProgram}, solver{s}, solverTimeout{t},
-  arrayCache{ac}, visitorA{'a', ac}, visitorB{'b', ac}
-{}
+void
+logArgs(const Differentiator::TestArgs& args)
+{
+  llvm::errs() << "Args:";
+  for (const auto& s : args) {
+    llvm::errs() << ' ';
+    logBytes(s);
+  }
+  llvm::errs() << '\n';
+}
+
+void
+logClusters(const Differentiator::Clusters& clusters, bool text = true)
+{
+  for (const auto& [output, revisions] : clusters) {
+    llvm::errs() << "Revisions:";
+    for (const auto& rev : revisions)
+      llvm::errs() << ' ' << rev;
+    llvm::errs() << '\n';
+    if (text) {
+      llvm::errs() << std::string((const char*) output.data());
+    } else {
+      logBytes(output);
+      llvm::errs() << '\n';
+    }
+  }
+}
 
 void
 Differentiator::extract(ExecutionState* a, ExecutionState* b,
@@ -292,30 +314,51 @@ Differentiator::search(ExecutionState* latest)
 void
 Differentiator::log()
 {
-  for (const auto& t : this->tests) {
-    std::map<Bytes, std::set<std::uint64_t>> stdoutClusters;
-    std::map<std::pair<std::string, Bytes>,
-             std::set<std::uint64_t>> varClusters;
-    for (const auto& rev : t.second) {
-      for (const auto& var : rev.second.first)
-        varClusters[var].insert(rev.first);
-      stdoutClusters[rev.second.second].insert(rev.first);
-      if (stdoutClusters.size() > 1) {
-        llvm::errs() << "Args:";
-        for (const auto& s : t.first) {
-          llvm::errs() << ' ';
-          for (const auto c : s)
-            llvm::errs() << (int) c << '.';
-        }
-        llvm::errs() << '\n';
-        for (const auto& so : stdoutClusters) {
-          llvm::errs() << "Revisions:";
-          for (const auto& r : so.second)
-            llvm::errs() << ' ' << r;
-          llvm::errs() << '\n' << std::string((const char*) so.first.data());
-        }
-        llvm::errs() << '\n';
-      }
+  std::vector<std::pair<const TestArgs, const Clusters>> clusterings;
+  for (auto& t : this->tests) {
+    Clusters clusters;
+    for (const auto& rev : this->revisions) {
+      Bytes& out = t.second[rev].second;
+      if (out.empty()) // backfill stdout
+        out = run(rev, this->prog, t.first);
+      clusters[out].insert(rev);
     }
+
+    if (clusters.size() > 1)
+      clusterings.push_back({std::move(t.first), std::move(clusters)});
   }
+  for (const auto& [args, clusters] : clusterings) {
+    logArgs(args);
+    logClusters(clusters);
+    llvm::errs() << '\n';
+  }
+
+  size_t minLump = this->revisions.size();
+  std::map<size_t, size_t> minDepth;
+  for (size_t i = 0u; i < this->revisions.size(); ++i)
+    minDepth[i] = this->revisions.size() - 1;
+  std::vector<size_t> indices;
+  for (size_t i = 0u; i < clusterings.size(); ++i)
+    indices.push_back(i);
+  do {
+    std::map<std::uint64_t, std::set<std::uint64_t>> diffed;
+    size_t depth = 0u;
+    for (const auto& i : indices) {
+      const auto& [args, clusters] = clusterings[i];
+      for (const auto& [_, lump] : clusters)
+        for (const auto& key : lump)
+          for (const auto& val : this->revisions)
+            if (lump.find(val) == lump.end())
+              diffed[key].insert(val);
+      size_t maxLump = 1u;
+      for (const auto& [rev, others] : diffed)
+        maxLump = std::max(this->revisions.size() - others.size(), maxLump);
+      minDepth[maxLump] = std::min(++depth, minDepth[maxLump]);
+      minLump = std::min(maxLump, minLump);
+      if (maxLump == 1)
+        break;
+    }
+  } while (std::next_permutation(indices.begin(), indices.end()));
+  llvm::errs() << minDepth[minLump] << " decisions with "
+               << minLump << " undiffed at most\n";
 }
diff --git a/lib/Core/Differentiator.h b/lib/Core/Differentiator.h
index 3743743c..11225ac7 100644
--- a/lib/Core/Differentiator.h
+++ b/lib/Core/Differentiator.h
@@ -38,8 +38,11 @@ struct Differentiator {
   /// CLI arguments
   typedef std::vector<Bytes> TestArgs;
   /// :rev (:var val) stdout
-  typedef std::map<std::uint64_t, std::pair<std::map<std::string, Bytes>,
-                                            Bytes>> TestOuts;
+  typedef std::map<std::uint64_t,
+                   std::pair<std::map<std::string, Bytes>,
+                             Bytes>>
+          TestOuts;
+  typedef std::map<Bytes, std::set<std::uint64_t>> Clusters;
 
   /// Extract revision number from given condition if any
   static std::pair<bool, std::uint64_t> extractPatchNumber(ref<Expr> e);