// Metaprogram's revision differentiator implementation // Copyright 2023 Nguyễn Gia Phong // // This file is distributed under the University of Illinois // Open Source License. See LICENSE.TXT for details. #include "Differentiator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CommandLine.h" #include #include #include #include llvm::cl::opt InputProgram{llvm::cl::desc(""), llvm::cl::Positional, llvm::cl::Required}; using namespace klee; bool isSymArg(const std::string& name) { /// Match arg\d\d return (name.size() == 5 // string::starts_with requires C++20 && name[0] == 'a' && name[1] == 'r' && name[2] == 'g' && '0' <= name[3] && name[3] <= '9' && '0' <= name[4] && name[4] <= '9'); } bool isSymOut(const std::string& name) { /// Match for out!.*\d return (name[0] == 'o' && name[1] == 'u' && name[2] == 't' && name[3] == '!' && '0' <= name[name.size() - 1] && name[name.size() - 1] <= '9'); } std::pair Differentiator::extractPatchNumber(ref e) { if (e.get()->getKind() == Expr::And) { const auto& et = dyn_cast(e.get()); const auto& l = extractPatchNumber(et->left); const auto& r = extractPatchNumber(et->right); // Optimized if ((__choose\d+) == 0) {} else if (\1 == \d+) assert(!(l.first && r.first)); return {l.first || r.first, l.first ? l.second : r.second}; } if (e.get()->getKind() != Expr::Eq) return {false, 0u}; // expecting __choose\d+ == \d+ const auto eq = dyn_cast(e.get()); // Expr is unbalanced to the right, hence scalar must be on the left. if (eq->left.get()->getKind() != Expr::Constant) return {false, 0u}; const auto constant = dyn_cast(eq->left.get()); if (constant->isFalse()) // negation return {false, 0u}; // could only be !(__choose.+) anyway if meta ReadExpr* read; if (eq->right.get()->getKind() == Expr::Read) { read = dyn_cast(eq->right.get()); } else { if (eq->right.get()->getKind() != Expr::Concat) return {false, 0u}; const auto concat = dyn_cast(eq->right.get()); if (concat->getLeft().get()->getKind() != Expr::Read) return {false, 0u}; read = dyn_cast(concat->getLeft().get()); } const auto& name = read->updates.root->name; // Skip when it is not __choose\d.* if (name.substr(0, 8) != "__choose" || '0' > name[8] || name[8] > '9') return {false, 0u}; return {true, constant->getZExtValue()}; } ExprCmbnVisitor::ExprCmbnVisitor(char rev, ArrayCache& ac) : revision(rev), arrayCache(ac) {} ExprVisitor::Action ExprCmbnVisitor::visitExprPost(const Expr& e) { if (e.getKind() != Expr::Read) return ExprVisitor::Action::doChildren(); const auto load = dyn_cast(&e); const auto orig = load->updates.root; if (!isSymOut(orig->name)) return ExprVisitor::Action::doChildren(); const auto a = this->arrayCache.CreateArray( orig->name + '!' + this->revision, orig->size, 0, 0, orig->domain, orig->range); const auto replacement = ReadExpr::create({a, load->updates.head}, load->index); this->outVars[orig->name][load->index] = dyn_cast(replacement.get()); return ExprVisitor::Action::changeTo(replacement); } Differentiator::Differentiator(std::unique_ptr* s, time::Span& t, ArrayCache& ac) : revisions{{0}}, prog{InputProgram.c_str()}, solver{s}, solverTimeout{t}, arrayCache{ac}, visitorA{'a', ac}, visitorB{'b', ac} {} Differentiator::Bytes run(const std::uint64_t rev, const char* prog, const Differentiator::TestInps& inputs) { int ind[2], outd[2]; { auto err = pipe(ind); assert(!err); err = pipe(outd); assert(!err); } posix_spawn_file_actions_t action; posix_spawn_file_actions_init(&action); posix_spawn_file_actions_adddup2(&action, ind[0], 0); posix_spawn_file_actions_addclose(&action, ind[1]); posix_spawn_file_actions_addclose(&action, outd[0]); posix_spawn_file_actions_adddup2(&action, outd[1], 1); { pid_t pid; std::vector argv {prog}; for (const auto& v : inputs.first) argv.push_back((const char *) v.data()); argv.push_back(NULL); std::ostringstream env; env << "KLEE_META=" << rev; char *const envp[] = {const_cast(env.str().c_str()), NULL}; const auto err = posix_spawn(&pid, prog, &action, NULL, const_cast(argv.data()), envp); assert(!err); } write(ind[0], inputs.second.data(), inputs.second.size()); close(ind[0]); close(outd[1]); char buffer[128]; // output buffer for concrete execution Differentiator::Bytes result; for (unsigned char n; n = read(outd[0], buffer, sizeof(buffer));) { assert(n >= 0); for (unsigned char i = 0; i < n; ++i) result.push_back(buffer[i]); } posix_spawn_file_actions_destroy(&action); result.push_back(0); // null termination return result; } void logBytes(const Differentiator::Bytes& buffer) { llvm::errs() << (int) buffer[0]; for (size_t i = 1; i < buffer.size(); ++i) llvm::errs() << ':' << (int) buffer[i]; } void logInputs(const Differentiator::TestInps& inputs) { llvm::errs() << "Args:"; for (const auto& s : inputs.first) { llvm::errs() << ' '; logBytes(s); } llvm::errs() << "\nStdin: " << inputs.second.data() << '\n'; } void logClusters(const Differentiator::Clusters& clusters, bool text = true) { for (const auto& [output, revisions] : clusters) { llvm::errs() << "Revisions:"; for (const auto& rev : revisions) llvm::errs() << ' ' << rev; llvm::errs() << '\n'; if (text) { llvm::errs() << std::string((const char*) output.data()); } else { logBytes(output); llvm::errs() << '\n'; } } } void Differentiator::extract(ExecutionState* a, ExecutionState* b, const std::vector& objects, const std::vector& values) { TestInps inputs; TestOuts outputs; { std::map args; for (unsigned i = 0; i < objects.size(); ++i) { const auto& name = objects[i]->name; if (isSymArg(name)) { args[std::atoi(name.c_str() + 3)] = values[i]; args[std::atoi(name.c_str() + 3)].push_back(0); // null termination } else if (isSymOut(name.substr(0, name.size() - 2))) { const auto rev = ((name[name.size() - 1] == 'a') ? a : b)->patchNo; outputs[rev].first[name.substr(4, name.size() - 6)] = values[i]; } else if (name == "stdin") { inputs.second = values[i]; } } uint8_t last = 0; for (const auto& [rev, arg] : args) { assert(rev == last); inputs.first.push_back(arg); last++; } } for (const auto& rev : this->revisions) outputs[rev].second = run(rev, this->prog, inputs); this->tests[inputs] = outputs; std::map revOut; for (const auto& [rev, out] : outputs) { const auto& [syms, concrete] = out; for (const auto& [var, val] : syms) revOut[var][val].insert(rev); revOut[""][concrete].insert(rev); } for (const auto& [_, clusters] : revOut) for (const auto& p : clusters) for (const auto& q : clusters) { if (&p == &q) continue; for (std::uint64_t i : p.second) for (std::uint64_t j : q.second) if (i < j) this->done.emplace(std::make_pair(i, j), &this->tests[inputs]); } } void Differentiator::search(ExecutionState* latest) { if (!this->exits[latest->patchNo].insert(latest).second) return; // skip when seen before for (const auto& rev : this->exits) { if (rev.first == latest->patchNo) continue; if (rev.first < latest->patchNo) { if (this->done.find(std::make_pair(rev.first, latest->patchNo)) != this->done.end()) continue; } else { if (this->done.find(std::make_pair(latest->patchNo, rev.first)) != this->done.end()) continue; } for (const auto& state : rev.second) { ConstraintSet cmbnSet; { std::set> combination; for (const auto& constraint : state->constraints) combination.insert(this->visitorA.visit(constraint)); for (const auto& constraint : latest->constraints) combination.insert(this->visitorB.visit(constraint)); for (const auto& constraint : combination) cmbnSet.push_back(constraint); } std::vector objects; ref distinction; for (const auto& sym : state->symbolics) { if (isSymArg(sym.second->name)) { objects.push_back(sym.second); continue; } if (!isSymOut(sym.second->name)) continue; unsigned char i = sym.second->name.size() - 1; while (sym.second->name[i] != '!' && sym.second->name[i] != '_') --i; const auto& name = sym.second->name[i] == '_' ? sym.second->name.substr(0, i) : sym.second->name; objects.push_back(arrayCache.CreateArray(name + "!a", sym.second->size)); objects.push_back(arrayCache.CreateArray(name + "!b", sym.second->size)); // FIXME: impossible to use visitor hash for (const auto& a : this->visitorA.outVars[name]) { const auto& b = this->visitorB.outVars[name].find(a.first); if (b == this->visitorB.outVars[name].end()) continue; const auto& ne = NotExpr::create(EqExpr::create(a.second, b->second)); if (distinction.get() == nullptr) distinction = ne; else distinction = OrExpr::create(ne, distinction); } } if (distinction.get() == nullptr) continue; // no common symbolic cmbnSet.push_back(distinction); std::vector values; std::unique_ptr& solver = *this->solver; // do judge! solver->setTimeout(solverTimeout); bool success = solver->getInitialValues(cmbnSet, objects, values, state->queryMetaData); solver->setTimeout(time::Span()); if (!success) continue; this->extract(latest, state, objects, values); assert(!this->tests.empty()); break; // one diff test per termination for now } } } void Differentiator::log() { std::vector> clusterings; for (auto& t : this->tests) { Clusters clusters; for (const auto& rev : this->revisions) { Bytes& out = t.second[rev].second; if (out.empty()) // backfill stdout out = run(rev, this->prog, t.first); clusters[out].insert(rev); } if (clusters.size() > 1) clusterings.push_back({std::move(t.first), std::move(clusters)}); } for (const auto& [inputs, clusters] : clusterings) { logInputs(inputs); logClusters(clusters); llvm::errs() << '\n'; } size_t minLump = this->revisions.size(); std::map minDepth; for (size_t i = 0u; i < this->revisions.size(); ++i) minDepth[i] = this->revisions.size() - 1; std::vector indices; for (size_t i = 0u; i < clusterings.size(); ++i) indices.push_back(i); do { std::map> diffed; size_t depth = 0u; for (const auto& i : indices) { const auto& [args, clusters] = clusterings[i]; for (const auto& [_, lump] : clusters) for (const auto& key : lump) for (const auto& val : this->revisions) if (lump.find(val) == lump.end()) diffed[key].insert(val); size_t maxLump = 1u; for (const auto& [rev, others] : diffed) maxLump = std::max(this->revisions.size() - others.size(), maxLump); minDepth[maxLump] = std::min(++depth, minDepth[maxLump]); minLump = std::min(maxLump, minLump); if (maxLump == 1) break; } } while (std::next_permutation(indices.begin(), indices.end())); llvm::errs() << minDepth[minLump] << " decisions with " << minLump << " undiffed at most\n"; }