diff options
Diffstat (limited to 'stp/AST/ToCNF.cpp')
-rw-r--r-- | stp/AST/ToCNF.cpp | 506 |
1 files changed, 506 insertions, 0 deletions
diff --git a/stp/AST/ToCNF.cpp b/stp/AST/ToCNF.cpp new file mode 100644 index 00000000..2a18b3f5 --- /dev/null +++ b/stp/AST/ToCNF.cpp @@ -0,0 +1,506 @@ +/******************************************************************** + * AUTHORS: David L. Dill, Vijay Ganesh + * + * BEGIN DATE: November, 2005 + * + * LICENSE: Please view LICENSE file in the home dir of this Program + ********************************************************************/ +// -*- c++ -*- + +// THEORY: This code translates an arbitrary Boolean DAG, generated by +// the BitBlast.cpp, to an equi-satisfiable CNF formula. There are +// four kinds of variables in the CNF formula: (1) propositional +// variables from the original formula; (2) BVGETBIT formulas from the +// original formula (a precondition is that the BVGETBIT can only be +// applied to bitvector-valued variables, array reads, or +// uninterpreted functions); (3) TRUE; or (4) representative variables +// (see below). Each literal in the CNF formula is one of these or +// its negation. + +// It is convenient (though not perfectly efficient) to be able to add +// TRUE and FALSE constants to clauses, which is not allowed in CNF. +// So, there is a "dummy variable" representing TRUE, which is used in +// its place (so FALSE is represented by the negation of the dummy +// var). The CNF formula has a unit clause containing this dummy +// variable, so that any satisfying assignment must make the dummy var +// TRUE. + +// Every sub-formula of the input formula has a "representative +// literal." A truth assignment satisfies the CNF formula iff the +// restriction of that assignment to the original variables satisfies +// the original formula, AND the rep lits have the same truth values +// as the subformulas they represent in the original formula. In the +// trivial cases, the representative literal is the input variable, or +// dummy true var, or its negation. Representative literals may be +// negated variables -- essentially, only AND formulas are translated, +// and everything else is handled by rewriting or negating formulas. +// The representative for (NOT alpha) is the negation of the +// representative for alpha. + +// The translation is performed by ToCNF_int, which traverses the original +// formula. ToCNF adds clauses that constrain the representative variables +// to be equal to the truth values of the formulas they represent. +// ToCNF always returns a literal whose value must be equivalent to the formula +// it translated. In trivial cases, this literal is a literal from the original +// formula, or the dummy true/false literals. If the formula is of the form +// (not alpha), ToCNF_int negates the literal representing alpha (which may +// itself be a negative literal) and returns it. Otherwise, ToCNF_int assigns +// a new rep var, adds the clauses, and returns the new var. ToCNF_int is +// memoized so that it doesn't assign more than one variable to a subformula, +// and to prevent exponential numbers of redundant visits to shared subformulas. + +// In reality, only AND/OR/NOT formulas are translated directly. Everything +// else (XOR, IFF, IMPLIES) is rewritten on-the-fly into these forms. I +// could have done that in bit-blasting, but I thought that, in the future, +// we might be able to translate these operations differently in different +// contexts to optimize the CNF formula. + +// FIXME: Inspection of the clauses is kind of horrifying. In +// addition to true/false, there are duplicate literals and duplicate +// clauses all over the place. +#include "AST.h" +static bool CNF_trace = false; +namespace BEEV { +/** Statistics class. Total number of variables is best tracked in + ToSAT. Number of clauses is just cll.size() */ + +class CNFstats { +public: + int _num_new_rep_vars; + int _num_clauses; + + // constructor + CNFstats() : _num_new_rep_vars(0), _num_clauses(0) {} + + void printStats() { + if(stats) { + cout << "ToCNF statistics:" << endl; + cout << "Number of new representative variables: " + << _num_new_rep_vars << endl; + cout << "Number of new clauses: " + << _num_clauses << endl; + } + } + +}; + + +/** This class contains private data and function members for the + CNF conversion */ +class CNFMgr { + + friend class BeevMgr; + +public: + + // Needed in the body of BeevMgr::ToCNF. It's not visible outside + // this file, though. + ASTNode dummy_true_var; + + // CNF Pre-pass + ASTNodeMap ToCNFPrePassMemo; + + // CNF Memo Table. + ASTNodeMap CNFMemo; + + +private: + + // Pointer back to BeevMgr with the node tables, etc. + BeevMgr *bm; + + // For ToCNF conversion. This holds a dummy variable representing + // "True". It is added as a unit clause, so that it will be assigned + // to true and propagated immediately by any CNF solver. + + ASTNode dummy_false_var; // not of dummy_true_var + + CNFstats stats; + + // constructor + CNFMgr(BeevMgr *bmgr) + { + bm = bmgr; + + // Dummy variable so TRUE can be a literal. + dummy_true_var = bm->CreateSymbol("*TrueDummy*"); + dummy_false_var = bm->CreateSimpNot(dummy_true_var); + } + + // Returns true iff result has been memoized. + // if it returns true, result is returned in by-ref parameter "result" + // Consider just putitng this in-line. + bool CNFIsMemoized(ASTNode &form, ASTNode &result) + { + ASTNodeMap::iterator it = CNFMemo.find(form); + if (it != CNFMemo.end()) { + result = it->second; //already there. Just return it. + return true; + } + else { + return false; + } + } + + + // Convert a big XOR to a bunch of AND/ORs. Assumes subformulas have + // already been converted. + ASTNode convertXORs(ASTVec children) + { + ASTNode accum = children[0]; + ASTVec::iterator itend = children.end(); + for (ASTVec::iterator it = children.begin()+1; it < itend; it++) { + // a XOR b -> (a | b) & (!a | !b) + + // For each XOR node with k children, creates approximately + // 5*(k-1) nodes. AND + 2 OR + 2 NOT. + + ASTNode or1 = bm->CreateNode(OR, accum, *it); + ASTNode or2 = bm->CreateNode(OR, bm->CreateSimpNot(accum), bm->CreateSimpNot(*it)); + accum = bm->CreateNode(AND, or1, or2); + + } + + return accum; + } + + + // Do preliminary transformations on bitblasted formula to make + // CNF conversion easier. + // Converts XORs to AND/OR form. + ASTNode ToCNFPrePass(const ASTNode &form) + { + + // Check memo table + ASTNodeMap::iterator mem_it = ToCNFPrePassMemo.find(form); + if (mem_it != ToCNFPrePassMemo.end()) { + return mem_it->second; + } + + ASTNode result; + + ASTVec new_children; + ASTVec::const_iterator endit = form.end(); + for (ASTVec::const_iterator it = form.begin(); it != endit; it++) { + ASTNode ch = ToCNFPrePass(*it); + new_children.push_back(ch); + } + + Kind k = form.GetKind(); + + switch (k) { + case FALSE: + case TRUE: + case SYMBOL: + case BVGETBIT: { + result = form; + break; + } + case XOR: { + // convertXORs can only be called once per XOR node. + result = convertXORs(new_children); + + // cout << "convertXORs num args: " << new_children.size() << endl; + // temporary node for node count. + // ASTNode tmp = bm->CreateNode(XOR, new_children ); + // cout << "convertXORs size of [" << form.GetNodeNum() << "] " << bm->NodeSize(form, true) << endl; + // cout << "convertXORs before size: " << bm->NodeSize(tmp, true) << endl; + // cout << "convertXORs after size: " << bm->NodeSize(result, true) << endl; + break; + } + default: { + // Be cautious about using CreateSimpForm -- It makes big xors! + result = bm->CreateNode(k, new_children); + } + } + +// cout << "================" << endl +// << "ToCNFPrePass:" << form << endl +// << "----------------" << endl +// << "ToCNFPrePass Result:" << result << endl; + + return (ToCNFPrePassMemo[form] = result); + + } + + // Memoize and return formula value + ASTNode CNFMemoize(ASTNode& form, ASTNode result) + { + CNFMemo[form] = result; + return result; + } + + + // Create a representative variable for an original formula. + // The convention is that the variable will have the same truth + // value as the expression numbered "num." + ASTNode RepLit(const char *name, int exprnum) + { + // FIXME: can this be done more efficiently with string type? + ostringstream oss; + oss << name << "{" << exprnum << "}"; + ASTNode t = bm->CreateSymbol(oss.str().c_str()); + + // Track how many we're generating. + stats._num_new_rep_vars++; + + //ASTNode is of type BOOLEAN <==> ((indexwidth=0)&&(valuewidth=0)) + t.SetIndexWidth(0); + t.SetValueWidth(0); + return t; + } + + // Handle the cases where it's necessary to do n children. + // This code translates ANDs, and converts NAND, NOR, OR by negating + // the inputs or outputs of the AND. + ASTNode ToCNF_AndLike(Kind k, BeevMgr::ClauseList& cll, ASTNode form) + { + // Build vectors of positive and negative rep lits for children + ASTVec kidlits(0); + ASTVec negkidlits(0); + ASTVec::const_iterator kids_end = form.end(); + for (ASTVec::const_iterator it = form.begin(); it != kids_end; it++) { + ASTNode kidreplit = ToCNF_int(cll, *it); + kidlits.push_back(kidreplit); + negkidlits.push_back(bm->CreateSimpNot(kidreplit)); + } + + ASTNode replit; + // translate the AND, negating inputs as appropriate. + if (k == OR || k == NOR) { + replit = ToCNF_AND(cll, form.GetNodeNum(), negkidlits, kidlits); + } + else { + replit = ToCNF_AND(cll, form.GetNodeNum(), kidlits, negkidlits); + } + + // Reduce NAND/OR to AND by negating result. + if (k == NAND || k == OR) { + return CNFMemoize(form, bm->CreateSimpNot(replit)); + } + else { + return CNFMemoize(form, replit); + } + } + + ASTNode ToCNF_AND(BeevMgr::ClauseList& cll, int nodenum, ASTVec& kidlits, ASTVec& negkidlits) + { + // Translate an AND, given rep lits for children + // Build clauses for (replit <-> a AND b AND c) + + ASTNode replit = RepLit("cnf", nodenum); + ASTNode notreplit = bm->CreateSimpNot(replit); + + if (CNF_trace) { + cout << "Translating AND" << endl << "-----------------------" << endl + << "Rep lit =" << replit << endl + << "-----------------------"; + } + + // (a AND b AND c -> replit) == (~a OR ~b OR ~c OR replit) + BeevMgr::ClausePtr clp = new ASTVec(negkidlits); + clp->push_back(replit); + + if (CNF_trace) { + LispPrintVec(cout, *clp, 0); + cout << endl << "-----------------------" << endl; + } + + cll.push_back(clp); + + // (replit -> (a AND b AND c)) == + // (~replit OR a) AND (~replit OR b) AND (~replit OR c) + ASTVec::const_iterator kidlits_end = kidlits.end(); + for (ASTVec::iterator it = kidlits.begin(); it != kidlits_end; it++) { + clp = new ASTVec(); + clp->push_back(notreplit); + clp->push_back(*it); + + if (CNF_trace) { + LispPrintVec(cout, *clp, 0); + cout << endl << "-----------------------" << endl; + } + + cll.push_back(clp); + } + + return replit; + } + +public: + + /** Builds clauses globally and returns a literal. + The literal can be a leaf from the expression, or a rep var + made up to represent the subexpression. */ + ASTNode ToCNF_int(BeevMgr::BeevMgr::ClauseList& cll, ASTNode form) { + // FIXME: assert indexwidth= 0, valuewidth = 1 + + // FIXME: rewriting is top-down, which is not efficient. + // It rewrites the top node of the tree, then does the children. + // Either rewrite in a separate pass, or translate children + // before rewriting somehow (might require handling rep lits + // as though they were real lits, which is probably ok). + + // Return memoized value if seen before. + ASTNode result; + Kind k = form.GetKind(); + + if (CNFIsMemoized(form, result)) { + return result; + } + + switch (k) { + // handle the trivial cases here. If none apply, call the + // heavy-duty function. If a constant or literal, just return + // without creating a clause. + case FALSE: { + result = dummy_false_var; + break; + } + case TRUE: { + result = dummy_true_var; + break; + } + case SYMBOL: + case BVGETBIT: { + result = form; + break; + } + + case NOT: { + ASTNode replit = ToCNF_int(cll, form[0]); + result = bm->CreateSimpNot(replit); + break; + } + + // For these, I can't think of anything better than expanding into ANDs/ORs + case ITE: { + // (ite a b c) == (~a OR b) AND (a OR c) + ASTNode l = bm->CreateNode(OR, bm->CreateSimpNot(form[0]), form[1]); + ASTNode r = bm->CreateNode(OR, form[0], form[2]); + ASTNode andor = bm->CreateNode(AND, l, r); + if (CNF_trace) { + cout << "Rewriting " << form << endl + << "to" << andor << endl + << "-------------------" << endl; + } + result = ToCNF_int(cll, andor); + break; + } + case IMPLIES: { + // Just rewrite into (~a OR b) + ASTNode l = bm->CreateSimpNot(form[0]); + ASTNode andor = bm->CreateNode(OR, l, form[1]); + if (CNF_trace) { + cout << "Rewriting " << form << endl + << "to" << andor << endl + << "-------------------" << endl; + } + result = ToCNF_int(cll, andor); + break; + } + case XOR: { + FatalError("ToCNF_int: XORs should have been converted to AND/OR by this point."); + break; + } + + case IFF: { + FatalError("BitBlaster failed to eliminate all IFFs."); + break; + } + + case AND: + case OR: + case NOR: + case NAND: { + result = ToCNF_AndLike(k, cll, form); + break; + } + default: + cerr << "ToCNF: can't handle this kind: " << k << endl; + FatalError(""); + } + + if (CNF_trace) { + cout << "ToCNF_int: Literal " << result << " represents formula " << + form << endl << "---------------" << endl; + } + + return CNFMemoize(form, result); + } //end of ToCNF_int() + + +}; // end of CNFMgr class + + // These are the bodies of functions in the BeevMgr that are part of + // the public interface. + + // Debug printing function. + void BeevMgr::PrintClauseList(ostream& os, BeevMgr::ClauseList& cll) + { + int num_clauses = cll.size(); + os << "Clauses: " << endl << "=========================================" << endl; + for(int i=0; i < num_clauses; i++) { + os << "Clause " << i << endl + << "-------------------------------------------" << endl; + LispPrintVec(os, *cll[i], 0); + os << endl + << "-------------------------------------------" << endl; + } + } + + void BeevMgr::DeleteClauseList(BeevMgr::ClauseList *cllp) + { + BeevMgr::ClauseList::const_iterator iend = cllp->end(); + for (BeevMgr::ClauseList::const_iterator i = cllp->begin(); i < iend; i++) { + delete *i; + } + delete cllp; + } + + // Top level conversion function + BeevMgr::ClauseList *BeevMgr::ToCNF(const ASTNode& form) + { + + // FIXME: This is leaked as well. + CNFMgr *cm = new CNFMgr(this); + + // Prepass + ASTNode form1 = cm->ToCNFPrePass(form); + + // cout << "Number of nodes after ToCNFPrePass" << NodeSize(form1, true) << endl; + + // cout << "ToCNF: After ToCNFPrePass" << form1 << endl; + + // FIXME: Assert CNFMemo is empty. + + // The clause list we will be building up. + // FIXME: This is never freed. + ClauseList *cllp = new ClauseList(); + + BeevMgr::ClausePtr dummy_true_unit_clause = new ASTVec(); + dummy_true_unit_clause->push_back(cm->dummy_true_var); + cllp->push_back(dummy_true_unit_clause); + + // This is where the translation happens. + ASTNode toplit = cm->ToCNF_int(*cllp, form1); + + // Add the top literal as a unit clause, since it must + // be true when original formula is satsfied. + BeevMgr::ClausePtr clp = new ASTVec(0); + clp->push_back(toplit); + cllp->push_back(clp); + + cm->stats._num_clauses = cllp->size(); + cm->stats.printStats(); + + RepLitMap = cm->CNFMemo; // Save memo table for debugging (DD 1/13/07). + + cm->CNFMemo.clear(); // Important to do this so nodes get freed. + + delete cm; + + return cllp; + } + +} // end namespace |