about summary refs log tree commit diff homepage
diff options
context:
space:
mode:
authorNguyễn Gia Phong <cnx@loang.net>2023-08-28 15:07:42 +0900
committerNguyễn Gia Phong <cnx@loang.net>2024-03-05 17:31:12 +0900
commit10e4dd58331b0dc316c9cf2969f50dc7d74e313d (patch)
treed48cf0ec7ae3f74d975fd4513328c601da5be3c2
parent6004402e688ce7af3b8b578c9e500a26316030dc (diff)
downloadklee-10e4dd58331b0dc316c9cf2969f50dc7d74e313d.tar.gz
Retire external symbilic diff
-rw-r--r--.gitmodules3
-rw-r--r--README-psychic.md15
-rw-r--r--lib/Core/ExecutionState.cpp3
-rw-r--r--lib/Core/ExecutionState.h14
-rw-r--r--lib/Core/Executor.cpp194
-rw-r--r--lib/Core/Executor.h15
-rw-r--r--lib/Core/ExecutorUtil.cpp9
-rw-r--r--shell.nix10
m---------tools/symbdiff0
9 files changed, 126 insertions, 137 deletions
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 905e628d..00000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "tools/symbdiff"]
-	path = tools/symbdiff
-	url = https://git.sr.ht/~cnx/symbdiff
diff --git a/README-psychic.md b/README-psychic.md
new file mode 100644
index 00000000..59985141
--- /dev/null
+++ b/README-psychic.md
@@ -0,0 +1,15 @@
+# KLEE-Psychic
+
+## Usage
+
+The metaprogram under test is to be built with Clang 12 and the following flags:
+
+    -g -O0 -Xclang -disable-O0-optnone
+    -fsanitize=undefined -fno-sanitize=local-bounds,function,vptr'
+
+Both native executable and LLVM bitcode are needed.
+For the bitcode, pass `-emit-llvm -c -DKLEE_RUNTIME` to Clang
+
+KLEE should then be run with the following flags:
+
+    --use-visitor-hash=0 --posix-runtime --libc uclibc --ubsan-runtime
diff --git a/lib/Core/ExecutionState.cpp b/lib/Core/ExecutionState.cpp
index 1e45812c..3f92cccd 100644
--- a/lib/Core/ExecutionState.cpp
+++ b/lib/Core/ExecutionState.cpp
@@ -112,7 +112,6 @@ ExecutionState::ExecutionState(const ExecutionState& state):
     patchLocs(state.patchLocs),
     patchNo(state.patchNo),
     metaEnvVar(state.metaEnvVar),
-    formula(state.formula),
     steppedInstructions(state.steppedInstructions),
     instsSinceCovNew(state.instsSinceCovNew),
     unwindingInformation(state.unwindingInformation
@@ -415,7 +414,7 @@ std::string extractMetaEnvVar(ref<Expr> e) {
   }
 
   const auto& name = read->updates.root->name;
-  // Skip __choose\d.*
+  // Skip when it is not __choose\d.*
   if (name.substr(0, 8) != "__choose" || '0' > name[8] || name[8] > '9')
     return "";
   std::string value;
diff --git a/lib/Core/ExecutionState.h b/lib/Core/ExecutionState.h
index 91a5a9f9..0af3c336 100644
--- a/lib/Core/ExecutionState.h
+++ b/lib/Core/ExecutionState.h
@@ -235,9 +235,6 @@ public:
   /// @ brief Environment variable for runtime metaprogram switching.
   std::string metaEnvVar = "";
 
-  /// @ brief Terminated on exit, awaiting comparison.
-  std::string formula = "";
-
   /// @brief The numbers of times this state has run through Executor::stepInstruction
   std::uint64_t steppedInstructions = 0;
 
@@ -309,9 +306,16 @@ struct ExecutionStateIDCompare {
   }
 };
 
-struct ExecutionStateFormulaCompare {
+struct ExecutionStatePathCondCompare {
   bool operator()(const ExecutionState *a, const ExecutionState *b) const {
-    return a->formula < b->formula;
+    if (a->constraints == b->constraints)
+      return false;
+    if (a->constraints.size() != b->constraints.size())
+      return a->constraints.size() < b->constraints.size();
+    for (ConstraintSet::constraint_iterator i = a->constraints.begin(),
+           j = b->constraints.begin();
+         i < a->constraints.end() && j < b->constraints.end(); ++i, ++j)
+      return *i < *j;
   }
 };
 }
diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp
index e5a6bc9c..73dc21fa 100644
--- a/lib/Core/Executor.cpp
+++ b/lib/Core/Executor.cpp
@@ -3421,8 +3421,8 @@ void Executor::updateStates(ExecutionState *current) {
     if (it3 != seedMap.end())
       seedMap.erase(it3);
     executionTree->remove(es->executionTreeNode);
-    if (es->formula.empty())
-      delete es;
+    // FIXME: collect garbage
+    // delete es;
   }
   removedStates.clear();
 }
@@ -3595,6 +3595,16 @@ bool Executor::checkMemoryUsage() {
   return false;
 }
 
+std::string quoted(const char* s) {
+  std::stringstream ss;
+  ss << std::quoted(s);
+  return ss.str();
+}
+
+inline char hex(unsigned char c) {
+  return (c < 10) ? '0' + c : 'a' + c - 10;
+}
+
 void Executor::doDumpStates() {
   if (!DumpStatesOnHalt || states.empty()) {
     interpreterHandler->incPathsExplored(states.size());
@@ -3610,23 +3620,22 @@ void Executor::doDumpStates() {
   for (const auto& p : this->metaEnvVars)
     revs.insert(p.first);
   for (const auto& t : this->diffTests) {
-    std::map<std::string, std::set<std::uint64_t>> stdoutClusters;
-    std::map<std::pair<std::string, std::string>,
+    std::map<TestVal, std::set<std::uint64_t>> stdoutClusters;
+    std::map<std::pair<std::string, TestVal>,
              std::set<std::uint64_t>> varClusters;
-    if (t.second.size() < this->metaEnvVars.size())
-      continue;
     for (const auto& rev : t.second) {
       for (const auto& var : rev.second.first)
         varClusters[var].insert(rev.first);
       stdoutClusters[rev.second.second].insert(rev.first);
+      // TODO: improve output format
       if (stdoutClusters.size() > 1) {
         llvm::errs() << "Args:";
         for (const auto& s : t.first)
-          llvm::errs() << " \"" << s << '"';
+          llvm::errs() << " \"" << quoted((const char *) s.data()) << '"';
         llvm::errs() << '\n';
         int i = 0;
         for (const auto& so : stdoutClusters) {
-          llvm::errs() << i++ << " " << so.first;
+          llvm::errs() << i++ << " " << so.first.data();
           for (const auto& r : so.second)
             llvm::errs() << " " << r;
           llvm::errs() << '\n';
@@ -3836,40 +3845,28 @@ static std::string terminationTypeFileExtension(StateTerminationType type) {
 };
 
 void Executor::extractDifferentiator(ExecutionState* a, ExecutionState* b,
-                                     const z3::model& m) {
-  std::map<std::uint8_t, std::string> args;
+                                     std::vector<const Array*> objects,
+                                     std::vector<TestVal> values) {
+  Executor::TestArgs argv;
   Executor::TestOuts outputs;
-  for (auto k = m.size(); k--;) {
-    const auto& name = m[k].name().str();
-    if (isSymArg(name)) {
-      const uint8_t i = std::atoi(name.c_str() + 3);
-      args[i] = "";
-      const auto& expr = m.eval(m[k]());
-      for (uint8_t i = 0; i < this->symArgs[i]; ++i) {
-        const auto c = z3::select(expr, i).simplify().as_uint64();
-        assert(c <= std::numeric_limits<unsigned char>::max());
-        args[i].push_back((unsigned char) c);
-      }
-    } else if (isSymOut(name.substr(0, name.size() - 2))) {
-      // TODO: use arguments for all patches
-      const auto& expr = m.eval(m[k]());
-      std::string binary {""};
-      const auto size = this->symOuts[name.substr(0, name.size() - 2)];
-      for (uint8_t i = 0; i < size; ++i) {
-        const auto c = z3::select(expr, i).simplify().as_uint64();
-        assert(c <= std::numeric_limits<unsigned char>::max());
-        binary.push_back((unsigned char) c);
+  {
+    std::map<std::uint8_t, TestVal> args;
+    for (unsigned i = 0; i < objects.size(); ++i) {
+      const auto& name = objects[i]->name;
+      if (isSymArg(name)) {
+        args[std::atoi(name.c_str() + 3)] = values[i];
+        args[std::atoi(name.c_str() + 3)].push_back(0); // null termination
+      } else if (isSymOut(name.substr(0, name.size() - 2))) {
+        const auto rev = ((name[name.size() - 1] == 'a') ? a : b)->patchNo;
+        outputs[rev].first[name.substr(4, name.size() - 6)] = values[i];
       }
-      const auto rev = ((name[name.size() - 1] == 'a') ? a : b)->patchNo;
-      outputs[rev].first[name.substr(4, name.size() - 6)] = binary;
     }
-  }
-  Executor::TestArgs argv;
-  uint8_t last = 0;
-  for (const auto& p : args) {
-    assert(p.first == last);
-    argv.push_back(p.second);
-    last++;
+    uint8_t last = 0;
+    for (const auto& p : args) {
+      assert(p.first == last);
+      argv.push_back(p.second);
+      last++;
+    }
   }
 
   char buffer[128];
@@ -3877,8 +3874,8 @@ void Executor::extractDifferentiator(ExecutionState* a, ExecutionState* b,
     auto& envs = rev.second;
     pid_t pid;
     std::vector<const char*> argp {this->concreteProgram.c_str()};
-    for (const auto& s : argv)
-      argp.push_back(s.c_str());
+    for (const auto& v : argv)
+      argp.push_back((const char *) v.data());
     argp.push_back(NULL);
 
     int fildes[2];
@@ -3894,19 +3891,17 @@ void Executor::extractDifferentiator(ExecutionState* a, ExecutionState* b,
                       envs.empty() ? NULL : envp);
     assert(!err);
     close(fildes[1]);
-    std::string out = "";
     for (ssize_t n; n = read(fildes[0], buffer, 127);) {
       assert(n >= 0);
       buffer[n] = 0;
-      out += buffer;
+      for (const unsigned char c : buffer)
+        outputs[rev.first].second.push_back(c);
     }
-    outputs[rev.first].second = out;
     posix_spawn_file_actions_destroy(&action);
   }
-  //const auto& p = this->diffTests.insert(std::make_pair(argv, outputs));
   this->diffTests[argv] = outputs;
   // :var :val cluster
-  std::map<std::string, std::map<std::string, std::set<std::uint64_t>>> revOut;
+  std::map<std::string, std::map<TestVal, std::set<std::uint64_t>>> revOut;
   for (auto& o : outputs) {
     for (auto& var : o.second.first)
       revOut[var.first][var.second].insert(o.first);
@@ -3926,21 +3921,7 @@ void Executor::extractDifferentiator(ExecutionState* a, ExecutionState* b,
       }
 }
 
-std::string hash(std::string data) {
-  std::stringstream stream;
-  stream << std::hex << std::setw(16) << std::setfill('0')
-         << std::hash<std::string>{}(data);
-  return stream.str();
-}
-
 void Executor::searchDifferentiators(ExecutionState* latest) {
-  auto last_smt2_name = hash(latest->formula) + ".smt2";
-  std::ofstream last_smt2_file;
-  last_smt2_file.open(last_smt2_name);
-  last_smt2_file << latest->formula;
-  last_smt2_file.close();
-
-  std::hash<std::string> hashFn;
   for (const auto& rev : this->exitStates) {
     if (rev.first == latest->patchNo)
       continue;
@@ -3954,55 +3935,58 @@ void Executor::searchDifferentiators(ExecutionState* latest) {
         continue;
     }
     for (const auto& state : rev.second) {
-      // TODO: implement symbdiff natively
-      std::set<ref<Expr>> combination;
-      for (auto const& constraint : state->constraints)
-        combination.insert(this->visitorA.visit(constraint));
-      for (auto const& constraint : latest->constraints)
-        combination.insert(this->visitorB.visit(constraint));
-
-      auto hashPair = std::make_pair(hashFn(state->formula),
-                                     hashFn(latest->formula));
-      if (hashPair.first == hashPair.second || this->compared[hashPair])
-        continue;
-      this->compared[hashPair] = true;
-      hashPair = {hashFn(latest->formula), hashFn(state->formula)};
-      if (this->compared[hashPair])
-        continue;
-      this->compared[hashPair] = true;
-
-      // File I/O is expensive but SMT solving is even more expensive (-;
-      const auto& smt2_name = hash(state->formula) + ".smt2";
-      std::ofstream smt2_file;
-      smt2_file.open(smt2_name);
-      smt2_file << state->formula;
-      smt2_file.close();
-
-      auto command = "symbdiff " + smt2_name + " " + last_smt2_name;
-      for (const auto& out : this->symOuts)
-        command += " " + std::to_string(out.second);
-      const auto out = popen((command + " 2> /dev/null").c_str(), "r");
-      std::string formula;
-      char buffer[128];
-      while (!feof(out))
-        if (fgets(buffer, 127, out) != NULL)
-          formula += buffer;
-      remove(smt2_name.c_str());
-      if (pclose(out))
-        continue;
+      ConstraintSet cmbnSet;
+      {
+        std::set<ref<Expr>> combination;
+        for (auto const& constraint : state->constraints)
+          combination.insert(this->visitorA.visit(constraint));
+        for (auto const& constraint : latest->constraints)
+          combination.insert(this->visitorB.visit(constraint));
+        for (auto const& constraint : combination)
+          cmbnSet.push_back(constraint);
+      }
+
+      std::vector<const Array*> objects;
+      // FIXME: what if diff symbolics
+      for (const auto& sym : state->symbolics) {
+        if (isSymArg(sym.second->name)) {
+          objects.push_back(sym.second);
+          continue;
+        }
+        if (!isSymOut(sym.second->name))
+          continue;
+        objects.push_back(arrayCache.CreateArray(sym.second->name + "!a",
+                                                 sym.second->size));
+        objects.push_back(arrayCache.CreateArray(sym.second->name + "!b",
+                                                 sym.second->size));
+
+        ref<Expr> distinction;
+        // FIXME: impossible to use visitor hash
+        for (const auto& a : this->visitorA.outVars[sym.second->name]) {
+          const auto ne = NotExpr::create(EqExpr::create(a.second,
+            this->visitorB.outVars[sym.second->name][a.first]));
+          if (distinction.get() == nullptr)
+            distinction = ne;
+          else
+            distinction = OrExpr::create(ne, distinction);
+        }
+        assert(distinction.get() != nullptr);
+        cmbnSet.push_back(distinction);
+      }
 
-      static z3::context c;
-      static z3::solver s {c};
-      s.reset();
-      s.from_string(formula.c_str());
-      if (s.check() != z3::sat)
+      this->solver->setTimeout(coreSolverTimeout);
+      std::vector<TestVal> values;
+      bool success = this->solver->getInitialValues(cmbnSet, objects, values,
+                                                    state->queryMetaData);
+      solver->setTimeout(time::Span());
+      if (!success)
         continue;
-      this->extractDifferentiator(latest, state, s.get_model());
+      this->extractDifferentiator(latest, state, objects, values);
       assert(!this->diffTests.empty());
-      break;
+      llvm::errs() << latest->patchNo << ' ' << state->patchNo << '\n';
+      break; // one diff test per termination for now
     }
   }
-  remove(last_smt2_name.c_str());
 }
 
 void Executor::terminateStateOnExit(ExecutionState &state) {
@@ -4014,7 +3998,6 @@ void Executor::terminateStateOnExit(ExecutionState &state) {
 
   interpreterHandler->incPathsCompleted();
   metaEnvVars[state.patchNo] = state.metaEnvVar;
-  getConstraintLog(state, state.formula, Interpreter::SMTLIB2);
   if (exitStates[state.patchNo].insert(&state).second)
     searchDifferentiators(&state);
   terminateState(state, StateTerminationType::Exit);
@@ -4143,7 +4126,6 @@ void Executor::terminateStateOnError(ExecutionState &state,
   }
 
   metaEnvVars[state.patchNo] = state.metaEnvVar;
-  getConstraintLog(state, state.formula, Interpreter::SMTLIB2);
   if (exitStates[state.patchNo].insert(&state).second)
     searchDifferentiators(&state);
   terminateState(state, terminationType);
diff --git a/lib/Core/Executor.h b/lib/Core/Executor.h
index fcb8fd80..8b2a609c 100644
--- a/lib/Core/Executor.h
+++ b/lib/Core/Executor.h
@@ -96,6 +96,7 @@ namespace klee {
 struct ExprCmbnVisitor : ExprVisitor {
   char revision;
   ArrayCache& arrayCache;
+  std::map<std::string, std::map<ref<Expr>, ref<ReadExpr>>> outVars;
   ExprCmbnVisitor(char rev, ArrayCache& ac) : revision(rev), arrayCache(ac) {}
   Action visitExprPost(const Expr& e) override;
 };
@@ -116,11 +117,11 @@ public:
   RNG theRNG;
 
 private:
-  typedef std::vector<std::string> TestArgs;
+  typedef std::vector<unsigned char> TestVal;
+  typedef std::vector<TestVal> TestArgs;
   /// :rev (:var val) stdout
-  typedef std::map<std::uint64_t,
-                   std::pair<std::map<std::string, std::string>,
-                             std::string>> TestOuts;
+  typedef std::map<std::uint64_t, std::pair<std::map<std::string, TestVal>,
+                                            TestVal>> TestOuts;
 
   std::unique_ptr<KModule> kmodule;
   InterpreterHandler *interpreterHandler;
@@ -132,7 +133,7 @@ private:
   std::set<ExecutionState*, ExecutionStateIDCompare> states;
   std::map<std::uint64_t,
            std::set<ExecutionState*,
-                    ExecutionStateFormulaCompare>> exitStates;
+                    ExecutionStatePathCondCompare>> exitStates;
   std::map<std::uint64_t, std::string> metaEnvVars;
   std::map<TestArgs, TestOuts> diffTests;
   std::map<std::pair<std::uint64_t, std::uint64_t>, TestOuts*> decided;
@@ -453,9 +454,9 @@ private:
   const InstructionInfo & getLastNonKleeInternalInstruction(const ExecutionState &state,
       llvm::Instruction** lastInstruction);
 
-  /// Extract differencial test from SMT model
+  /// Extract differencial test from concrete execution
   void extractDifferentiator(ExecutionState*, ExecutionState*,
-                             const z3::model&);
+                             std::vector<const Array*>, std::vector<TestVal>);
 
   /// Compare with other exit states for possible differencial tests
   void searchDifferentiators(ExecutionState *state);
diff --git a/lib/Core/ExecutorUtil.cpp b/lib/Core/ExecutorUtil.cpp
index af4f7b88..df13d2fd 100644
--- a/lib/Core/ExecutorUtil.cpp
+++ b/lib/Core/ExecutorUtil.cpp
@@ -279,15 +279,16 @@ namespace klee {
     if (e.getKind() != Expr::Read)
       return ExprVisitor::Action::doChildren();
     const auto load = dyn_cast<ReadExpr>(&e);
-
     const auto orig = load->updates.root;
-    if (orig->name.substr(0, 4) != "out!")
+    if (!isSymOut(orig->name))
       return ExprVisitor::Action::doChildren();
     const auto a = this->arrayCache.CreateArray(
       orig->name + '!' + this->revision,
       orig->size, 0, 0, orig->domain, orig->range);
-    return ExprVisitor::Action::changeTo(
-      ReadExpr::create({a, load->updates.head}, load->index));
+    const auto replacement = ReadExpr::create({a, load->updates.head},
+                                              load->index);
+    this->outVars[orig->name][load->index] = dyn_cast<ReadExpr>(replacement.get());
+    return ExprVisitor::Action::changeTo(replacement);
   }
 
   bool isSymArg(std::string name) {
diff --git a/shell.nix b/shell.nix
deleted file mode 100644
index 8e14a489..00000000
--- a/shell.nix
+++ /dev/null
@@ -1,10 +0,0 @@
-with import <nixpkgs> {};
-let symbdiff = callPackage ./tools/symbdiff { };
-in mkShell {
-  packages = [ llvmPackages_12.clang symbdiff ];
-  shellHook = ''
-    # clang -emit-llvm -c -DKLEE_RUNTIME metaprogram.c for bitcode
-    # clang -o metaprogram{,.c} for binary
-    alias clang='clang -g -O0 -Xclang -disable-O0-optnone -fsanitize=undefined -fno-sanitize=local-bounds,function,vptr'
- '';
-}
diff --git a/tools/symbdiff b/tools/symbdiff
deleted file mode 160000
-Subproject cf6ac1646a956657d69fe698486e6fef3220141