about summary refs log tree commit diff homepage
path: root/stp/AST/ToCNF.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'stp/AST/ToCNF.cpp')
-rw-r--r--stp/AST/ToCNF.cpp506
1 files changed, 506 insertions, 0 deletions
diff --git a/stp/AST/ToCNF.cpp b/stp/AST/ToCNF.cpp
new file mode 100644
index 00000000..2a18b3f5
--- /dev/null
+++ b/stp/AST/ToCNF.cpp
@@ -0,0 +1,506 @@
+/********************************************************************
+ * AUTHORS: David L. Dill, Vijay Ganesh 
+ *
+ * BEGIN DATE: November, 2005
+ *
+ * LICENSE: Please view LICENSE file in the home dir of this Program
+ ********************************************************************/
+// -*- c++ -*-
+
+// THEORY: This code translates an arbitrary Boolean DAG, generated by
+// the BitBlast.cpp, to an equi-satisfiable CNF formula.  There are
+// four kinds of variables in the CNF formula: (1) propositional
+// variables from the original formula; (2) BVGETBIT formulas from the
+// original formula (a precondition is that the BVGETBIT can only be
+// applied to bitvector-valued variables, array reads, or
+// uninterpreted functions); (3) TRUE; or (4) representative variables
+// (see below).  Each literal in the CNF formula is one of these or
+// its negation.  
+
+// It is convenient (though not perfectly efficient) to be able to add
+// TRUE and FALSE constants to clauses, which is not allowed in CNF.
+// So, there is a "dummy variable" representing TRUE, which is used in
+// its place (so FALSE is represented by the negation of the dummy
+// var).  The CNF formula has a unit clause containing this dummy
+// variable, so that any satisfying assignment must make the dummy var
+// TRUE.
+
+// Every sub-formula of the input formula has a "representative
+// literal."  A truth assignment satisfies the CNF formula iff the
+// restriction of that assignment to the original variables satisfies
+// the original formula, AND the rep lits have the same truth values
+// as the subformulas they represent in the original formula.  In the
+// trivial cases, the representative literal is the input variable, or
+// dummy true var, or its negation.  Representative literals may be
+// negated variables -- essentially, only AND formulas are translated,
+// and everything else is handled by rewriting or negating formulas.
+// The representative for (NOT alpha) is the negation of the
+// representative for alpha.
+
+// The translation is performed by ToCNF_int, which traverses the original
+// formula.  ToCNF adds clauses that constrain the representative variables
+// to be equal to the truth values of the formulas they represent.
+// ToCNF always returns a literal whose value must be equivalent to the formula
+// it translated.  In trivial cases, this literal is a literal from the original
+// formula, or the dummy true/false literals.  If the formula is of the form
+// (not alpha), ToCNF_int negates the literal representing alpha (which may
+// itself be a negative literal) and returns it.  Otherwise, ToCNF_int assigns
+// a new rep var, adds the clauses, and returns the new var.  ToCNF_int is
+// memoized so that it doesn't assign more than one variable to a subformula,
+// and to prevent exponential numbers of redundant visits to shared subformulas.
+
+// In reality, only AND/OR/NOT formulas are translated directly.  Everything
+// else (XOR, IFF, IMPLIES) is rewritten on-the-fly into these forms.  I
+// could have done that in bit-blasting, but I thought that, in the future,
+// we might be able to translate these operations differently in different
+// contexts to optimize the CNF formula.
+
+// FIXME: Inspection of the clauses is kind of horrifying.  In
+// addition to true/false, there are duplicate literals and duplicate
+// clauses all over the place.
+#include "AST.h"
+static bool CNF_trace = false;
+namespace BEEV {
+/**  Statistics class.  Total number of variables is best tracked in
+     ToSAT.  Number of clauses is just cll.size() */
+
+class CNFstats {
+public:
+  int _num_new_rep_vars;
+  int _num_clauses;
+  
+  // constructor
+  CNFstats() : _num_new_rep_vars(0), _num_clauses(0) {}
+
+  void printStats() {
+    if(stats) {
+      cout << "ToCNF statistics:" << endl;
+      cout << "Number of new representative variables: "
+	   << _num_new_rep_vars << endl;
+      cout << "Number of new clauses: "
+	   << _num_clauses << endl;
+    }
+  }
+
+};
+
+
+/** This class contains private data and function members for the
+    CNF conversion */
+class CNFMgr {
+
+  friend class BeevMgr;
+
+public:    
+
+  // Needed in the body of BeevMgr::ToCNF.  It's not visible outside
+  // this file, though.
+  ASTNode dummy_true_var;
+
+  // CNF Pre-pass
+  ASTNodeMap ToCNFPrePassMemo;
+
+  // CNF Memo Table.
+  ASTNodeMap CNFMemo;
+
+
+private:
+
+  // Pointer back to BeevMgr with the node tables, etc.
+  BeevMgr *bm;
+  
+  // For ToCNF conversion.  This holds a dummy variable representing
+  // "True".  It is added as a unit clause, so that it will be assigned
+  // to true and propagated immediately by any CNF solver.
+
+  ASTNode dummy_false_var;	// not of dummy_true_var
+
+  CNFstats stats;
+
+  // constructor
+  CNFMgr(BeevMgr *bmgr)
+  {
+    bm = bmgr;
+
+    // Dummy variable so TRUE can be a literal.
+    dummy_true_var = bm->CreateSymbol("*TrueDummy*");
+    dummy_false_var = bm->CreateSimpNot(dummy_true_var);
+  }
+  
+  // Returns true iff result has been memoized.
+  // if it returns true, result is returned in by-ref parameter "result"
+  // Consider just putitng this in-line.
+  bool CNFIsMemoized(ASTNode &form, ASTNode &result)
+  {
+    ASTNodeMap::iterator it = CNFMemo.find(form);
+    if (it != CNFMemo.end()) {
+      result = it->second;  //already there. Just return it.
+      return true;
+    }
+    else {
+      return false;
+    }
+  }
+
+
+  // Convert a big XOR to a bunch of AND/ORs.  Assumes subformulas have
+  // already been converted.
+  ASTNode convertXORs(ASTVec children)
+  {
+    ASTNode accum = children[0];
+    ASTVec::iterator itend = children.end();
+    for (ASTVec::iterator it = children.begin()+1; it < itend; it++) {
+      // a XOR b -> (a | b) & (!a | !b)
+
+      // For each XOR node with k children, creates approximately
+      // 5*(k-1) nodes. AND + 2 OR + 2 NOT.
+
+      ASTNode or1 = bm->CreateNode(OR, accum, *it);
+      ASTNode or2 = bm->CreateNode(OR, bm->CreateSimpNot(accum), bm->CreateSimpNot(*it));
+      accum = bm->CreateNode(AND, or1, or2);
+      
+    }
+    
+    return accum;
+  }
+
+
+  // Do preliminary transformations on bitblasted formula to make 
+  // CNF conversion easier.
+  // Converts XORs to AND/OR form.
+  ASTNode ToCNFPrePass(const ASTNode &form)
+  {
+
+    // Check memo table
+    ASTNodeMap::iterator mem_it = ToCNFPrePassMemo.find(form);
+    if (mem_it != ToCNFPrePassMemo.end()) {
+      return mem_it->second;
+    }
+    
+    ASTNode result;
+
+    ASTVec new_children;
+    ASTVec::const_iterator endit = form.end();
+    for (ASTVec::const_iterator it = form.begin(); it != endit; it++) {
+      ASTNode ch = ToCNFPrePass(*it);
+      new_children.push_back(ch);
+    }
+
+    Kind k = form.GetKind();
+
+    switch (k) {
+    case FALSE: 
+    case TRUE:
+    case SYMBOL: 
+    case BVGETBIT: {
+      result = form;
+      break;
+    }
+    case XOR: {
+      // convertXORs can only be called once per XOR node.
+      result = convertXORs(new_children);
+
+      // cout << "convertXORs num args: "  << new_children.size() << endl;
+      // temporary node for node count.
+      // ASTNode tmp = bm->CreateNode(XOR, new_children );
+      // cout << "convertXORs size of [" << form.GetNodeNum() << "] " << bm->NodeSize(form, true) << endl;
+      // cout << "convertXORs before size: " << bm->NodeSize(tmp, true) << endl;
+      // cout << "convertXORs after size: " << bm->NodeSize(result, true) << endl;
+      break;
+    }
+    default: {
+      // Be cautious about using CreateSimpForm -- It makes big xors!
+      result = bm->CreateNode(k, new_children);
+    }
+    }
+
+//     cout << "================" << endl
+// 	 << "ToCNFPrePass:" << form << endl
+// 	 << "----------------" << endl
+// 	 << "ToCNFPrePass Result:" << result << endl;
+    
+    return (ToCNFPrePassMemo[form] = result);
+    
+  }
+
+  // Memoize and return formula value
+  ASTNode CNFMemoize(ASTNode& form, ASTNode result)
+  {
+    CNFMemo[form] = result;
+    return result;    
+  }
+
+ 
+  // Create a representative variable for an original formula.
+  // The convention is that the variable will have the same truth
+  // value as the expression numbered "num."  
+  ASTNode RepLit(const char *name, int exprnum)
+  {
+    // FIXME: can this be done more efficiently with string type?
+    ostringstream oss;
+    oss << name << "{" << exprnum << "}";
+    ASTNode t = bm->CreateSymbol(oss.str().c_str());
+
+    // Track how many we're generating.
+    stats._num_new_rep_vars++;
+    
+    //ASTNode is of type BOOLEAN <==> ((indexwidth=0)&&(valuewidth=0))
+    t.SetIndexWidth(0);
+    t.SetValueWidth(0);
+    return t;
+  }
+
+  // Handle the cases where it's necessary to do n children.
+  // This code translates ANDs, and converts NAND, NOR, OR by negating
+  // the inputs or outputs of the AND.
+  ASTNode ToCNF_AndLike(Kind k, BeevMgr::ClauseList& cll, ASTNode form)
+  {  
+    // Build vectors of positive and negative rep lits for children
+    ASTVec kidlits(0);
+    ASTVec negkidlits(0);
+    ASTVec::const_iterator kids_end = form.end();
+    for (ASTVec::const_iterator it = form.begin(); it != kids_end; it++) {
+      ASTNode kidreplit = ToCNF_int(cll, *it);
+      kidlits.push_back(kidreplit);
+      negkidlits.push_back(bm->CreateSimpNot(kidreplit));
+    }
+    
+    ASTNode replit;
+    // translate the AND, negating inputs as appropriate.
+    if (k == OR || k == NOR) {
+      replit = ToCNF_AND(cll, form.GetNodeNum(), negkidlits, kidlits);
+    }
+    else {
+      replit = ToCNF_AND(cll, form.GetNodeNum(), kidlits, negkidlits);
+    }
+    
+    // Reduce NAND/OR to AND by negating result.
+    if (k == NAND || k == OR) {
+      return CNFMemoize(form, bm->CreateSimpNot(replit));
+    }
+    else {
+      return CNFMemoize(form, replit);
+    }
+  }
+
+  ASTNode ToCNF_AND(BeevMgr::ClauseList& cll, int nodenum, ASTVec& kidlits, ASTVec& negkidlits)
+  {
+    // Translate an AND, given rep lits for children
+    // Build clauses for (replit <-> a AND b AND c)
+
+    ASTNode replit = RepLit("cnf", nodenum);
+    ASTNode notreplit = bm->CreateSimpNot(replit);
+
+    if (CNF_trace) {
+      cout << "Translating AND" << endl << "-----------------------" << endl
+	   << "Rep lit =" <<  replit << endl
+	   << "-----------------------";
+    }
+
+    // (a AND b AND c -> replit) ==   (~a OR ~b OR ~c OR replit)
+    BeevMgr::ClausePtr clp = new ASTVec(negkidlits);
+    clp->push_back(replit);
+
+    if (CNF_trace) {
+      LispPrintVec(cout, *clp, 0);
+      cout << endl << "-----------------------" << endl;
+    }
+
+    cll.push_back(clp);
+
+    // (replit -> (a AND b AND c)) == 
+    //     (~replit OR a) AND (~replit OR b) AND (~replit OR c)
+    ASTVec::const_iterator kidlits_end = kidlits.end();
+    for (ASTVec::iterator it = kidlits.begin(); it != kidlits_end; it++) {
+      clp = new ASTVec();
+      clp->push_back(notreplit);
+      clp->push_back(*it);
+
+      if (CNF_trace) {
+	LispPrintVec(cout, *clp, 0);
+	cout << endl << "-----------------------" << endl;
+      }
+
+      cll.push_back(clp);
+    }
+
+    return replit;
+  }
+
+public:
+   
+  /** Builds clauses globally and returns a literal.
+      The literal can be a leaf from the expression, or a rep var
+      made up to represent the subexpression. */
+  ASTNode ToCNF_int(BeevMgr::BeevMgr::ClauseList& cll, ASTNode form) {
+    // FIXME: assert indexwidth= 0, valuewidth = 1
+
+    // FIXME:  rewriting is top-down, which is not efficient.
+    // It rewrites the top node of the tree, then does the children.
+    // Either rewrite in a separate pass, or translate children
+    // before rewriting somehow (might require handling rep lits
+    // as though they were real lits, which is probably ok).
+    
+    // Return memoized value if seen before.
+    ASTNode result;
+    Kind k = form.GetKind();
+
+    if (CNFIsMemoized(form, result)) {
+      return result;
+    }
+
+    switch (k) {
+      // handle the trivial cases here.  If none apply, call the
+      // heavy-duty function.  If a constant or literal, just return
+      // without creating a clause.
+    case FALSE: {
+      result = dummy_false_var;
+      break;
+    }
+    case TRUE: {
+      result =  dummy_true_var;
+      break;
+    }
+    case SYMBOL: 
+    case BVGETBIT: {
+      result = form;
+      break;
+    }
+      
+    case NOT: {
+	ASTNode replit = ToCNF_int(cll, form[0]);
+	result = bm->CreateSimpNot(replit);
+      break;
+    }
+      
+      // For these, I can't think of anything better than expanding into ANDs/ORs
+    case ITE: {
+      // (ite a b c) == (~a OR b) AND (a OR c)
+      ASTNode l = bm->CreateNode(OR, bm->CreateSimpNot(form[0]), form[1]);
+      ASTNode r = bm->CreateNode(OR, form[0], form[2]);
+      ASTNode andor = bm->CreateNode(AND, l, r);
+      if (CNF_trace) {
+	cout << "Rewriting " << form << endl
+	     << "to" << andor << endl
+	     << "-------------------" << endl;
+      }
+      result = ToCNF_int(cll, andor);
+      break;
+    }
+    case IMPLIES: {
+      // Just rewrite into (~a OR b)
+      ASTNode l = bm->CreateSimpNot(form[0]);
+      ASTNode andor = bm->CreateNode(OR, l, form[1]);
+      if (CNF_trace) {
+	cout << "Rewriting " << form << endl
+	     << "to" << andor << endl
+	     << "-------------------" << endl;
+      }
+      result = ToCNF_int(cll, andor);
+      break;
+    }
+    case XOR: {
+      FatalError("ToCNF_int: XORs should have been converted to AND/OR by this point.");
+      break;
+    }
+
+    case IFF: {
+      FatalError("BitBlaster failed to eliminate all IFFs.");
+      break;
+    }
+    
+    case AND:
+    case OR:
+    case NOR:
+    case NAND: {
+      result = ToCNF_AndLike(k, cll, form);
+      break;
+    }
+    default:
+      cerr << "ToCNF: can't handle this kind: " << k << endl;
+      FatalError("");
+    }
+
+    if (CNF_trace) {
+      cout << "ToCNF_int: Literal " << result << " represents formula " <<
+	form << endl << "---------------" << endl;
+    }
+
+    return CNFMemoize(form, result);
+  } //end of ToCNF_int()
+
+
+}; // end of CNFMgr class
+
+  // These are the bodies of functions in the BeevMgr that are part of
+  // the public interface.
+
+  // Debug printing function.
+  void BeevMgr::PrintClauseList(ostream& os, BeevMgr::ClauseList& cll)
+  {
+    int num_clauses = cll.size();
+    os << "Clauses: " << endl << "=========================================" << endl;
+    for(int i=0; i < num_clauses; i++) {
+      os << "Clause " << i << endl
+	 << "-------------------------------------------" << endl;
+      LispPrintVec(os, *cll[i], 0);
+      os << endl
+	 << "-------------------------------------------" << endl;
+    }
+  }
+
+  void BeevMgr::DeleteClauseList(BeevMgr::ClauseList *cllp)
+  {
+    BeevMgr::ClauseList::const_iterator iend = cllp->end();
+    for (BeevMgr::ClauseList::const_iterator i = cllp->begin(); i < iend; i++) {
+      delete *i;
+    }
+    delete cllp;
+  }
+
+  // Top level conversion function
+  BeevMgr::ClauseList *BeevMgr::ToCNF(const ASTNode& form) 
+  {
+
+    // FIXME: This is leaked as well.
+    CNFMgr *cm = new CNFMgr(this);
+   
+    // Prepass
+    ASTNode form1 = cm->ToCNFPrePass(form);
+
+    // cout << "Number of nodes after ToCNFPrePass" << NodeSize(form1, true) << endl;
+
+    // cout << "ToCNF: After ToCNFPrePass" << form1 << endl;
+
+    // FIXME: Assert CNFMemo is empty.
+
+    // The clause list we will be building up.
+    // FIXME: This is never freed.
+    ClauseList *cllp = new ClauseList();
+
+    BeevMgr::ClausePtr dummy_true_unit_clause = new ASTVec();
+    dummy_true_unit_clause->push_back(cm->dummy_true_var);
+    cllp->push_back(dummy_true_unit_clause);
+
+    // This is where the translation happens.
+    ASTNode toplit = cm->ToCNF_int(*cllp, form1);
+
+    // Add the top literal as a unit clause, since it must
+    // be true when original formula is satsfied.
+    BeevMgr::ClausePtr clp = new ASTVec(0);
+    clp->push_back(toplit);
+    cllp->push_back(clp);
+
+    cm->stats._num_clauses = cllp->size();
+    cm->stats.printStats();
+
+    RepLitMap = cm->CNFMemo;	// Save memo table for debugging (DD 1/13/07).
+
+    cm->CNFMemo.clear();   // Important to do this so nodes get freed.
+
+    delete cm;
+
+    return cllp;
+  }
+
+} // end namespace