/******************************************************************** * AUTHORS: David L. Dill, Vijay Ganesh * * BEGIN DATE: November, 2005 * * LICENSE: Please view LICENSE file in the home dir of this Program ********************************************************************/ // -*- c++ -*- // THEORY: This code translates an arbitrary Boolean DAG, generated by // the BitBlast.cpp, to an equi-satisfiable CNF formula. There are // four kinds of variables in the CNF formula: (1) propositional // variables from the original formula; (2) BVGETBIT formulas from the // original formula (a precondition is that the BVGETBIT can only be // applied to bitvector-valued variables, array reads, or // uninterpreted functions); (3) TRUE; or (4) representative variables // (see below). Each literal in the CNF formula is one of these or // its negation. // It is convenient (though not perfectly efficient) to be able to add // TRUE and FALSE constants to clauses, which is not allowed in CNF. // So, there is a "dummy variable" representing TRUE, which is used in // its place (so FALSE is represented by the negation of the dummy // var). The CNF formula has a unit clause containing this dummy // variable, so that any satisfying assignment must make the dummy var // TRUE. // Every sub-formula of the input formula has a "representative // literal." A truth assignment satisfies the CNF formula iff the // restriction of that assignment to the original variables satisfies // the original formula, AND the rep lits have the same truth values // as the subformulas they represent in the original formula. In the // trivial cases, the representative literal is the input variable, or // dummy true var, or its negation. Representative literals may be // negated variables -- essentially, only AND formulas are translated, // and everything else is handled by rewriting or negating formulas. // The representative for (NOT alpha) is the negation of the // representative for alpha. // The translation is performed by ToCNF_int, which traverses the original // formula. ToCNF adds clauses that constrain the representative variables // to be equal to the truth values of the formulas they represent. // ToCNF always returns a literal whose value must be equivalent to the formula // it translated. In trivial cases, this literal is a literal from the original // formula, or the dummy true/false literals. If the formula is of the form // (not alpha), ToCNF_int negates the literal representing alpha (which may // itself be a negative literal) and returns it. Otherwise, ToCNF_int assigns // a new rep var, adds the clauses, and returns the new var. ToCNF_int is // memoized so that it doesn't assign more than one variable to a subformula, // and to prevent exponential numbers of redundant visits to shared subformulas. // In reality, only AND/OR/NOT formulas are translated directly. Everything // else (XOR, IFF, IMPLIES) is rewritten on-the-fly into these forms. I // could have done that in bit-blasting, but I thought that, in the future, // we might be able to translate these operations differently in different // contexts to optimize the CNF formula. // FIXME: Inspection of the clauses is kind of horrifying. In // addition to true/false, there are duplicate literals and duplicate // clauses all over the place. #include "AST.h" static bool CNF_trace = false; namespace BEEV { /** Statistics class. Total number of variables is best tracked in ToSAT. Number of clauses is just cll.size() */ class CNFstats { public: int _num_new_rep_vars; int _num_clauses; // constructor CNFstats() : _num_new_rep_vars(0), _num_clauses(0) {} void printStats() { if(stats) { cout << "ToCNF statistics:" << endl; cout << "Number of new representative variables: " << _num_new_rep_vars << endl; cout << "Number of new clauses: " << _num_clauses << endl; } } }; /** This class contains private data and function members for the CNF conversion */ class CNFMgr { friend class BeevMgr; public: // Needed in the body of BeevMgr::ToCNF. It's not visible outside // this file, though. ASTNode dummy_true_var; // CNF Pre-pass ASTNodeMap ToCNFPrePassMemo; // CNF Memo Table. ASTNodeMap CNFMemo; private: // Pointer back to BeevMgr with the node tables, etc. BeevMgr *bm; // For ToCNF conversion. This holds a dummy variable representing // "True". It is added as a unit clause, so that it will be assigned // to true and propagated immediately by any CNF solver. ASTNode dummy_false_var; // not of dummy_true_var CNFstats stats; // constructor CNFMgr(BeevMgr *bmgr) { bm = bmgr; // Dummy variable so TRUE can be a literal. dummy_true_var = bm->CreateSymbol("*TrueDummy*"); dummy_false_var = bm->CreateSimpNot(dummy_true_var); } // Returns true iff result has been memoized. // if it returns true, result is returned in by-ref parameter "result" // Consider just putitng this in-line. bool CNFIsMemoized(ASTNode &form, ASTNode &result) { ASTNodeMap::iterator it = CNFMemo.find(form); if (it != CNFMemo.end()) { result = it->second; //already there. Just return it. return true; } else { return false; } } // Convert a big XOR to a bunch of AND/ORs. Assumes subformulas have // already been converted. ASTNode convertXORs(ASTVec children) { ASTNode accum = children[0]; ASTVec::iterator itend = children.end(); for (ASTVec::iterator it = children.begin()+1; it < itend; it++) { // a XOR b -> (a | b) & (!a | !b) // For each XOR node with k children, creates approximately // 5*(k-1) nodes. AND + 2 OR + 2 NOT. ASTNode or1 = bm->CreateNode(OR, accum, *it); ASTNode or2 = bm->CreateNode(OR, bm->CreateSimpNot(accum), bm->CreateSimpNot(*it)); accum = bm->CreateNode(AND, or1, or2); } return accum; } // Do preliminary transformations on bitblasted formula to make // CNF conversion easier. // Converts XORs to AND/OR form. ASTNode ToCNFPrePass(const ASTNode &form) { // Check memo table ASTNodeMap::iterator mem_it = ToCNFPrePassMemo.find(form); if (mem_it != ToCNFPrePassMemo.end()) { return mem_it->second; } ASTNode result; ASTVec new_children; ASTVec::const_iterator endit = form.end(); for (ASTVec::const_iterator it = form.begin(); it != endit; it++) { ASTNode ch = ToCNFPrePass(*it); new_children.push_back(ch); } Kind k = form.GetKind(); switch (k) { case FALSE: case TRUE: case SYMBOL: case BVGETBIT: { result = form; break; } case XOR: { // convertXORs can only be called once per XOR node. result = convertXORs(new_children); // cout << "convertXORs num args: " << new_children.size() << endl; // temporary node for node count. // ASTNode tmp = bm->CreateNode(XOR, new_children ); // cout << "convertXORs size of [" << form.GetNodeNum() << "] " << bm->NodeSize(form, true) << endl; // cout << "convertXORs before size: " << bm->NodeSize(tmp, true) << endl; // cout << "convertXORs after size: " << bm->NodeSize(result, true) << endl; break; } default: { // Be cautious about using CreateSimpForm -- It makes big xors! result = bm->CreateNode(k, new_children); } } // cout << "================" << endl // << "ToCNFPrePass:" << form << endl // << "----------------" << endl // << "ToCNFPrePass Result:" << result << endl; return (ToCNFPrePassMemo[form] = result); } // Memoize and return formula value ASTNode CNFMemoize(ASTNode& form, ASTNode result) { CNFMemo[form] = result; return result; } // Create a representative variable for an original formula. // The convention is that the variable will have the same truth // value as the expression numbered "num." ASTNode RepLit(const char *name, int exprnum) { // FIXME: can this be done more efficiently with string type? ostringstream oss; oss << name << "{" << exprnum << "}"; ASTNode t = bm->CreateSymbol(oss.str().c_str()); // Track how many we're generating. stats._num_new_rep_vars++; //ASTNode is of type BOOLEAN <==> ((indexwidth=0)&&(valuewidth=0)) t.SetIndexWidth(0); t.SetValueWidth(0); return t; } // Handle the cases where it's necessary to do n children. // This code translates ANDs, and converts NAND, NOR, OR by negating // the inputs or outputs of the AND. ASTNode ToCNF_AndLike(Kind k, BeevMgr::ClauseList& cll, ASTNode form) { // Build vectors of positive and negative rep lits for children ASTVec kidlits(0); ASTVec negkidlits(0); ASTVec::const_iterator kids_end = form.end(); for (ASTVec::const_iterator it = form.begin(); it != kids_end; it++) { ASTNode kidreplit = ToCNF_int(cll, *it); kidlits.push_back(kidreplit); negkidlits.push_back(bm->CreateSimpNot(kidreplit)); } ASTNode replit; // translate the AND, negating inputs as appropriate. if (k == OR || k == NOR) { replit = ToCNF_AND(cll, form.GetNodeNum(), negkidlits, kidlits); } else { replit = ToCNF_AND(cll, form.GetNodeNum(), kidlits, negkidlits); } // Reduce NAND/OR to AND by negating result. if (k == NAND || k == OR) { return CNFMemoize(form, bm->CreateSimpNot(replit)); } else { return CNFMemoize(form, replit); } } ASTNode ToCNF_AND(BeevMgr::ClauseList& cll, int nodenum, ASTVec& kidlits, ASTVec& negkidlits) { // Translate an AND, given rep lits for children // Build clauses for (replit <-> a AND b AND c) ASTNode replit = RepLit("cnf", nodenum); ASTNode notreplit = bm->CreateSimpNot(replit); if (CNF_trace) { cout << "Translating AND" << endl << "-----------------------" << endl << "Rep lit =" << replit << endl << "-----------------------"; } // (a AND b AND c -> replit) == (~a OR ~b OR ~c OR replit) BeevMgr::ClausePtr clp = new ASTVec(negkidlits); clp->push_back(replit); if (CNF_trace) { LispPrintVec(cout, *clp, 0); cout << endl << "-----------------------" << endl; } cll.push_back(clp); // (replit -> (a AND b AND c)) == // (~replit OR a) AND (~replit OR b) AND (~replit OR c) ASTVec::const_iterator kidlits_end = kidlits.end(); for (ASTVec::iterator it = kidlits.begin(); it != kidlits_end; it++) { clp = new ASTVec(); clp->push_back(notreplit); clp->push_back(*it); if (CNF_trace) { LispPrintVec(cout, *clp, 0); cout << endl << "-----------------------" << endl; } cll.push_back(clp); } return replit; } public: /** Builds clauses globally and returns a literal. The literal can be a leaf from the expression, or a rep var made up to represent the subexpression. */ ASTNode ToCNF_int(BeevMgr::BeevMgr::ClauseList& cll, ASTNode form) { // FIXME: assert indexwidth= 0, valuewidth = 1 // FIXME: rewriting is top-down, which is not efficient. // It rewrites the top node of the tree, then does the children. // Either rewrite in a separate pass, or translate children // before rewriting somehow (might require handling rep lits // as though they were real lits, which is probably ok). // Return memoized value if seen before. ASTNode result; Kind k = form.GetKind(); if (CNFIsMemoized(form, result)) { return result; } switch (k) { // handle the trivial cases here. If none apply, call the // heavy-duty function. If a constant or literal, just return // without creating a clause. case FALSE: { result = dummy_false_var; break; } case TRUE: { result = dummy_true_var; break; } case SYMBOL: case BVGETBIT: { result = form; break; } case NOT: { ASTNode replit = ToCNF_int(cll, form[0]); result = bm->CreateSimpNot(replit); break; } // For these, I can't think of anything better than expanding into ANDs/ORs case ITE: { // (ite a b c) == (~a OR b) AND (a OR c) ASTNode l = bm->CreateNode(OR, bm->CreateSimpNot(form[0]), form[1]); ASTNode r = bm->CreateNode(OR, form[0], form[2]); ASTNode andor = bm->CreateNode(AND, l, r); if (CNF_trace) { cout << "Rewriting " << form << endl << "to" << andor << endl << "-------------------" << endl; } result = ToCNF_int(cll, andor); break; } case IMPLIES: { // Just rewrite into (~a OR b) ASTNode l = bm->CreateSimpNot(form[0]); ASTNode andor = bm->CreateNode(OR, l, form[1]); if (CNF_trace) { cout << "Rewriting " << form << endl << "to" << andor << endl << "-------------------" << endl; } result = ToCNF_int(cll, andor); break; } case XOR: { FatalError("ToCNF_int: XORs should have been converted to AND/OR by this point."); break; } case IFF: { FatalError("BitBlaster failed to eliminate all IFFs."); break; } case AND: case OR: case NOR: case NAND: { result = ToCNF_AndLike(k, cll, form); break; } default: cerr << "ToCNF: can't handle this kind: " << k << endl; FatalError(""); } if (CNF_trace) { cout << "ToCNF_int: Literal " << result << " represents formula " << form << endl << "---------------" << endl; } return CNFMemoize(form, result); } //end of ToCNF_int() }; // end of CNFMgr class // These are the bodies of functions in the BeevMgr that are part of // the public interface. // Debug printing function. void BeevMgr::PrintClauseList(ostream& os, BeevMgr::ClauseList& cll) { int num_clauses = cll.size(); os << "Clauses: " << endl << "=========================================" << endl; for(int i=0; i < num_clauses; i++) { os << "Clause " << i << endl << "-------------------------------------------" << endl; LispPrintVec(os, *cll[i], 0); os << endl << "-------------------------------------------" << endl; } } void BeevMgr::DeleteClauseList(BeevMgr::ClauseList *cllp) { BeevMgr::ClauseList::const_iterator iend = cllp->end(); for (BeevMgr::ClauseList::const_iterator i = cllp->begin(); i < iend; i++) { delete *i; } delete cllp; } // Top level conversion function BeevMgr::ClauseList *BeevMgr::ToCNF(const ASTNode& form) { // FIXME: This is leaked as well. CNFMgr *cm = new CNFMgr(this); // Prepass ASTNode form1 = cm->ToCNFPrePass(form); // cout << "Number of nodes after ToCNFPrePass" << NodeSize(form1, true) << endl; // cout << "ToCNF: After ToCNFPrePass" << form1 << endl; // FIXME: Assert CNFMemo is empty. // The clause list we will be building up. // FIXME: This is never freed. ClauseList *cllp = new ClauseList(); BeevMgr::ClausePtr dummy_true_unit_clause = new ASTVec(); dummy_true_unit_clause->push_back(cm->dummy_true_var); cllp->push_back(dummy_true_unit_clause); // This is where the translation happens. ASTNode toplit = cm->ToCNF_int(*cllp, form1); // Add the top literal as a unit clause, since it must // be true when original formula is satsfied. BeevMgr::ClausePtr clp = new ASTVec(0); clp->push_back(toplit); cllp->push_back(clp); cm->stats._num_clauses = cllp->size(); cm->stats.printStats(); RepLitMap = cm->CNFMemo; // Save memo table for debugging (DD 1/13/07). cm->CNFMemo.clear(); // Important to do this so nodes get freed. delete cm; return cllp; } } // end namespace