about summary refs log tree commit diff homepage
path: root/stp/AST/ToCNF.cpp
blob: 2a18b3f5aaeb78be03c30b681bf35e6d5a3aa3b5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
/********************************************************************
 * AUTHORS: David L. Dill, Vijay Ganesh 
 *
 * BEGIN DATE: November, 2005
 *
 * LICENSE: Please view LICENSE file in the home dir of this Program
 ********************************************************************/
// -*- c++ -*-

// THEORY: This code translates an arbitrary Boolean DAG, generated by
// the BitBlast.cpp, to an equi-satisfiable CNF formula.  There are
// four kinds of variables in the CNF formula: (1) propositional
// variables from the original formula; (2) BVGETBIT formulas from the
// original formula (a precondition is that the BVGETBIT can only be
// applied to bitvector-valued variables, array reads, or
// uninterpreted functions); (3) TRUE; or (4) representative variables
// (see below).  Each literal in the CNF formula is one of these or
// its negation.  

// It is convenient (though not perfectly efficient) to be able to add
// TRUE and FALSE constants to clauses, which is not allowed in CNF.
// So, there is a "dummy variable" representing TRUE, which is used in
// its place (so FALSE is represented by the negation of the dummy
// var).  The CNF formula has a unit clause containing this dummy
// variable, so that any satisfying assignment must make the dummy var
// TRUE.

// Every sub-formula of the input formula has a "representative
// literal."  A truth assignment satisfies the CNF formula iff the
// restriction of that assignment to the original variables satisfies
// the original formula, AND the rep lits have the same truth values
// as the subformulas they represent in the original formula.  In the
// trivial cases, the representative literal is the input variable, or
// dummy true var, or its negation.  Representative literals may be
// negated variables -- essentially, only AND formulas are translated,
// and everything else is handled by rewriting or negating formulas.
// The representative for (NOT alpha) is the negation of the
// representative for alpha.

// The translation is performed by ToCNF_int, which traverses the original
// formula.  ToCNF adds clauses that constrain the representative variables
// to be equal to the truth values of the formulas they represent.
// ToCNF always returns a literal whose value must be equivalent to the formula
// it translated.  In trivial cases, this literal is a literal from the original
// formula, or the dummy true/false literals.  If the formula is of the form
// (not alpha), ToCNF_int negates the literal representing alpha (which may
// itself be a negative literal) and returns it.  Otherwise, ToCNF_int assigns
// a new rep var, adds the clauses, and returns the new var.  ToCNF_int is
// memoized so that it doesn't assign more than one variable to a subformula,
// and to prevent exponential numbers of redundant visits to shared subformulas.

// In reality, only AND/OR/NOT formulas are translated directly.  Everything
// else (XOR, IFF, IMPLIES) is rewritten on-the-fly into these forms.  I
// could have done that in bit-blasting, but I thought that, in the future,
// we might be able to translate these operations differently in different
// contexts to optimize the CNF formula.

// FIXME: Inspection of the clauses is kind of horrifying.  In
// addition to true/false, there are duplicate literals and duplicate
// clauses all over the place.
#include "AST.h"
static bool CNF_trace = false;
namespace BEEV {
/**  Statistics class.  Total number of variables is best tracked in
     ToSAT.  Number of clauses is just cll.size() */

class CNFstats {
public:
  int _num_new_rep_vars;
  int _num_clauses;
  
  // constructor
  CNFstats() : _num_new_rep_vars(0), _num_clauses(0) {}

  void printStats() {
    if(stats) {
      cout << "ToCNF statistics:" << endl;
      cout << "Number of new representative variables: "
	   << _num_new_rep_vars << endl;
      cout << "Number of new clauses: "
	   << _num_clauses << endl;
    }
  }

};


/** This class contains private data and function members for the
    CNF conversion */
class CNFMgr {

  friend class BeevMgr;

public:    

  // Needed in the body of BeevMgr::ToCNF.  It's not visible outside
  // this file, though.
  ASTNode dummy_true_var;

  // CNF Pre-pass
  ASTNodeMap ToCNFPrePassMemo;

  // CNF Memo Table.
  ASTNodeMap CNFMemo;


private:

  // Pointer back to BeevMgr with the node tables, etc.
  BeevMgr *bm;
  
  // For ToCNF conversion.  This holds a dummy variable representing
  // "True".  It is added as a unit clause, so that it will be assigned
  // to true and propagated immediately by any CNF solver.

  ASTNode dummy_false_var;	// not of dummy_true_var

  CNFstats stats;

  // constructor
  CNFMgr(BeevMgr *bmgr)
  {
    bm = bmgr;

    // Dummy variable so TRUE can be a literal.
    dummy_true_var = bm->CreateSymbol("*TrueDummy*");
    dummy_false_var = bm->CreateSimpNot(dummy_true_var);
  }
  
  // Returns true iff result has been memoized.
  // if it returns true, result is returned in by-ref parameter "result"
  // Consider just putitng this in-line.
  bool CNFIsMemoized(ASTNode &form, ASTNode &result)
  {
    ASTNodeMap::iterator it = CNFMemo.find(form);
    if (it != CNFMemo.end()) {
      result = it->second;  //already there. Just return it.
      return true;
    }
    else {
      return false;
    }
  }


  // Convert a big XOR to a bunch of AND/ORs.  Assumes subformulas have
  // already been converted.
  ASTNode convertXORs(ASTVec children)
  {
    ASTNode accum = children[0];
    ASTVec::iterator itend = children.end();
    for (ASTVec::iterator it = children.begin()+1; it < itend; it++) {
      // a XOR b -> (a | b) & (!a | !b)

      // For each XOR node with k children, creates approximately
      // 5*(k-1) nodes. AND + 2 OR + 2 NOT.

      ASTNode or1 = bm->CreateNode(OR, accum, *it);
      ASTNode or2 = bm->CreateNode(OR, bm->CreateSimpNot(accum), bm->CreateSimpNot(*it));
      accum = bm->CreateNode(AND, or1, or2);
      
    }
    
    return accum;
  }


  // Do preliminary transformations on bitblasted formula to make 
  // CNF conversion easier.
  // Converts XORs to AND/OR form.
  ASTNode ToCNFPrePass(const ASTNode &form)
  {

    // Check memo table
    ASTNodeMap::iterator mem_it = ToCNFPrePassMemo.find(form);
    if (mem_it != ToCNFPrePassMemo.end()) {
      return mem_it->second;
    }
    
    ASTNode result;

    ASTVec new_children;
    ASTVec::const_iterator endit = form.end();
    for (ASTVec::const_iterator it = form.begin(); it != endit; it++) {
      ASTNode ch = ToCNFPrePass(*it);
      new_children.push_back(ch);
    }

    Kind k = form.GetKind();

    switch (k) {
    case FALSE: 
    case TRUE:
    case SYMBOL: 
    case BVGETBIT: {
      result = form;
      break;
    }
    case XOR: {
      // convertXORs can only be called once per XOR node.
      result = convertXORs(new_children);

      // cout << "convertXORs num args: "  << new_children.size() << endl;
      // temporary node for node count.
      // ASTNode tmp = bm->CreateNode(XOR, new_children );
      // cout << "convertXORs size of [" << form.GetNodeNum() << "] " << bm->NodeSize(form, true) << endl;
      // cout << "convertXORs before size: " << bm->NodeSize(tmp, true) << endl;
      // cout << "convertXORs after size: " << bm->NodeSize(result, true) << endl;
      break;
    }
    default: {
      // Be cautious about using CreateSimpForm -- It makes big xors!
      result = bm->CreateNode(k, new_children);
    }
    }

//     cout << "================" << endl
// 	 << "ToCNFPrePass:" << form << endl
// 	 << "----------------" << endl
// 	 << "ToCNFPrePass Result:" << result << endl;
    
    return (ToCNFPrePassMemo[form] = result);
    
  }

  // Memoize and return formula value
  ASTNode CNFMemoize(ASTNode& form, ASTNode result)
  {
    CNFMemo[form] = result;
    return result;    
  }

 
  // Create a representative variable for an original formula.
  // The convention is that the variable will have the same truth
  // value as the expression numbered "num."  
  ASTNode RepLit(const char *name, int exprnum)
  {
    // FIXME: can this be done more efficiently with string type?
    ostringstream oss;
    oss << name << "{" << exprnum << "}";
    ASTNode t = bm->CreateSymbol(oss.str().c_str());

    // Track how many we're generating.
    stats._num_new_rep_vars++;
    
    //ASTNode is of type BOOLEAN <==> ((indexwidth=0)&&(valuewidth=0))
    t.SetIndexWidth(0);
    t.SetValueWidth(0);
    return t;
  }

  // Handle the cases where it's necessary to do n children.
  // This code translates ANDs, and converts NAND, NOR, OR by negating
  // the inputs or outputs of the AND.
  ASTNode ToCNF_AndLike(Kind k, BeevMgr::ClauseList& cll, ASTNode form)
  {  
    // Build vectors of positive and negative rep lits for children
    ASTVec kidlits(0);
    ASTVec negkidlits(0);
    ASTVec::const_iterator kids_end = form.end();
    for (ASTVec::const_iterator it = form.begin(); it != kids_end; it++) {
      ASTNode kidreplit = ToCNF_int(cll, *it);
      kidlits.push_back(kidreplit);
      negkidlits.push_back(bm->CreateSimpNot(kidreplit));
    }
    
    ASTNode replit;
    // translate the AND, negating inputs as appropriate.
    if (k == OR || k == NOR) {
      replit = ToCNF_AND(cll, form.GetNodeNum(), negkidlits, kidlits);
    }
    else {
      replit = ToCNF_AND(cll, form.GetNodeNum(), kidlits, negkidlits);
    }
    
    // Reduce NAND/OR to AND by negating result.
    if (k == NAND || k == OR) {
      return CNFMemoize(form, bm->CreateSimpNot(replit));
    }
    else {
      return CNFMemoize(form, replit);
    }
  }

  ASTNode ToCNF_AND(BeevMgr::ClauseList& cll, int nodenum, ASTVec& kidlits, ASTVec& negkidlits)
  {
    // Translate an AND, given rep lits for children
    // Build clauses for (replit <-> a AND b AND c)

    ASTNode replit = RepLit("cnf", nodenum);
    ASTNode notreplit = bm->CreateSimpNot(replit);

    if (CNF_trace) {
      cout << "Translating AND" << endl << "-----------------------" << endl
	   << "Rep lit =" <<  replit << endl
	   << "-----------------------";
    }

    // (a AND b AND c -> replit) ==   (~a OR ~b OR ~c OR replit)
    BeevMgr::ClausePtr clp = new ASTVec(negkidlits);
    clp->push_back(replit);

    if (CNF_trace) {
      LispPrintVec(cout, *clp, 0);
      cout << endl << "-----------------------" << endl;
    }

    cll.push_back(clp);

    // (replit -> (a AND b AND c)) == 
    //     (~replit OR a) AND (~replit OR b) AND (~replit OR c)
    ASTVec::const_iterator kidlits_end = kidlits.end();
    for (ASTVec::iterator it = kidlits.begin(); it != kidlits_end; it++) {
      clp = new ASTVec();
      clp->push_back(notreplit);
      clp->push_back(*it);

      if (CNF_trace) {
	LispPrintVec(cout, *clp, 0);
	cout << endl << "-----------------------" << endl;
      }

      cll.push_back(clp);
    }

    return replit;
  }

public:
   
  /** Builds clauses globally and returns a literal.
      The literal can be a leaf from the expression, or a rep var
      made up to represent the subexpression. */
  ASTNode ToCNF_int(BeevMgr::BeevMgr::ClauseList& cll, ASTNode form) {
    // FIXME: assert indexwidth= 0, valuewidth = 1

    // FIXME:  rewriting is top-down, which is not efficient.
    // It rewrites the top node of the tree, then does the children.
    // Either rewrite in a separate pass, or translate children
    // before rewriting somehow (might require handling rep lits
    // as though they were real lits, which is probably ok).
    
    // Return memoized value if seen before.
    ASTNode result;
    Kind k = form.GetKind();

    if (CNFIsMemoized(form, result)) {
      return result;
    }

    switch (k) {
      // handle the trivial cases here.  If none apply, call the
      // heavy-duty function.  If a constant or literal, just return
      // without creating a clause.
    case FALSE: {
      result = dummy_false_var;
      break;
    }
    case TRUE: {
      result =  dummy_true_var;
      break;
    }
    case SYMBOL: 
    case BVGETBIT: {
      result = form;
      break;
    }
      
    case NOT: {
	ASTNode replit = ToCNF_int(cll, form[0]);
	result = bm->CreateSimpNot(replit);
      break;
    }
      
      // For these, I can't think of anything better than expanding into ANDs/ORs
    case ITE: {
      // (ite a b c) == (~a OR b) AND (a OR c)
      ASTNode l = bm->CreateNode(OR, bm->CreateSimpNot(form[0]), form[1]);
      ASTNode r = bm->CreateNode(OR, form[0], form[2]);
      ASTNode andor = bm->CreateNode(AND, l, r);
      if (CNF_trace) {
	cout << "Rewriting " << form << endl
	     << "to" << andor << endl
	     << "-------------------" << endl;
      }
      result = ToCNF_int(cll, andor);
      break;
    }
    case IMPLIES: {
      // Just rewrite into (~a OR b)
      ASTNode l = bm->CreateSimpNot(form[0]);
      ASTNode andor = bm->CreateNode(OR, l, form[1]);
      if (CNF_trace) {
	cout << "Rewriting " << form << endl
	     << "to" << andor << endl
	     << "-------------------" << endl;
      }
      result = ToCNF_int(cll, andor);
      break;
    }
    case XOR: {
      FatalError("ToCNF_int: XORs should have been converted to AND/OR by this point.");
      break;
    }

    case IFF: {
      FatalError("BitBlaster failed to eliminate all IFFs.");
      break;
    }
    
    case AND:
    case OR:
    case NOR:
    case NAND: {
      result = ToCNF_AndLike(k, cll, form);
      break;
    }
    default:
      cerr << "ToCNF: can't handle this kind: " << k << endl;
      FatalError("");
    }

    if (CNF_trace) {
      cout << "ToCNF_int: Literal " << result << " represents formula " <<
	form << endl << "---------------" << endl;
    }

    return CNFMemoize(form, result);
  } //end of ToCNF_int()


}; // end of CNFMgr class

  // These are the bodies of functions in the BeevMgr that are part of
  // the public interface.

  // Debug printing function.
  void BeevMgr::PrintClauseList(ostream& os, BeevMgr::ClauseList& cll)
  {
    int num_clauses = cll.size();
    os << "Clauses: " << endl << "=========================================" << endl;
    for(int i=0; i < num_clauses; i++) {
      os << "Clause " << i << endl
	 << "-------------------------------------------" << endl;
      LispPrintVec(os, *cll[i], 0);
      os << endl
	 << "-------------------------------------------" << endl;
    }
  }

  void BeevMgr::DeleteClauseList(BeevMgr::ClauseList *cllp)
  {
    BeevMgr::ClauseList::const_iterator iend = cllp->end();
    for (BeevMgr::ClauseList::const_iterator i = cllp->begin(); i < iend; i++) {
      delete *i;
    }
    delete cllp;
  }

  // Top level conversion function
  BeevMgr::ClauseList *BeevMgr::ToCNF(const ASTNode& form) 
  {

    // FIXME: This is leaked as well.
    CNFMgr *cm = new CNFMgr(this);
   
    // Prepass
    ASTNode form1 = cm->ToCNFPrePass(form);

    // cout << "Number of nodes after ToCNFPrePass" << NodeSize(form1, true) << endl;

    // cout << "ToCNF: After ToCNFPrePass" << form1 << endl;

    // FIXME: Assert CNFMemo is empty.

    // The clause list we will be building up.
    // FIXME: This is never freed.
    ClauseList *cllp = new ClauseList();

    BeevMgr::ClausePtr dummy_true_unit_clause = new ASTVec();
    dummy_true_unit_clause->push_back(cm->dummy_true_var);
    cllp->push_back(dummy_true_unit_clause);

    // This is where the translation happens.
    ASTNode toplit = cm->ToCNF_int(*cllp, form1);

    // Add the top literal as a unit clause, since it must
    // be true when original formula is satsfied.
    BeevMgr::ClausePtr clp = new ASTVec(0);
    clp->push_back(toplit);
    cllp->push_back(clp);

    cm->stats._num_clauses = cllp->size();
    cm->stats.printStats();

    RepLitMap = cm->CNFMemo;	// Save memo table for debugging (DD 1/13/07).

    cm->CNFMemo.clear();   // Important to do this so nodes get freed.

    delete cm;

    return cllp;
  }

} // end namespace