about summary refs log tree commit diff homepage
path: root/include
diff options
context:
space:
mode:
authorEric Rizzi <eric.rizzi@gmail.com>2015-02-16 13:16:12 -0500
committerEric Rizzi <eric.rizzi@gmail.com>2015-02-22 16:10:02 -0500
commitf049ff3bc04daead8c3bb9f06e89e71e2054c82a (patch)
tree8de123348bb44c2964de383b790062d0fb19cbaf /include
parentad4f23ac1b1faa561d199b27e041e1a1afa3adcb (diff)
downloadklee-f049ff3bc04daead8c3bb9f06e89e71e2054c82a.tar.gz
Added factory method for Arrays + hid constructors from outside calls
The way that Arrays were handled in the past led to the possibility of
aliasing issues.  This occured whenever a new branch discovered an array
for the first time.  Each branch would create a new instance of the same
array without seeing if it had been created before. Therefore, should a
new branch encounter the same state as some previous branch, the
previous branch's solution wouldn't satisfy the new state since they
didn't recognize they were referencing the same array.  By creating an
array factory that creates a single symbolic array, that problem is
handled.  Note: Concrete arrays should not be created by the factory
method since their values are never shared between branches.

The factory works by seeing if an array with a similar hash has been
created before (the hash is based on the name and size of array).  If
there has been it then searches through all of the arrays with the same
hash (stored in a vector) to see if there is one with an exact match.
If there is one, the address of this previously created equivalent
array is returned.  Otherwise, the newly created array is unique, it is
added to the map, and it's address is returned.

This aliasing issue can be seen by comparing the output of the
Dogfood/ImmutableSet.cpp test cases with and with out this commit.
Both act correctly, but the number of queries making it to the solver
in the previous version is much greater 244 vs 211.  This is because
the UBTree in the CexCachingSolver and the cache in the CachingSolver
do not recognize queries whose solutions were previously calculated
because it doesn't think the arrays in the two queries are the same.
While this does not cause an error, it does mean that extra calls are
made.
Diffstat (limited to 'include')
-rw-r--r--include/klee/Expr.h71
1 files changed, 54 insertions, 17 deletions
diff --git a/include/klee/Expr.h b/include/klee/Expr.h
index c78cd690..2ea7f40f 100644
--- a/include/klee/Expr.h
+++ b/include/klee/Expr.h
@@ -22,6 +22,7 @@
 #include <sstream>
 #include <set>
 #include <vector>
+#include <map>
 
 namespace llvm {
   class Type;
@@ -600,18 +601,41 @@ private:
 
 class Array {
 public:
+  //Name of the array
   const std::string name;
+
   // FIXME: Not 64-bit clean.
-  unsigned size;  
+  const unsigned size;
+
+  /// Domain is how many bits can be used to access the array [32 bits]
+  /// Range is the size (in bits) of the number stored there (array of bytes -> 8)
+  const Expr::Width domain, range;
 
   /// constantValues - The constant initial values for this array, or empty for
   /// a symbolic array. If non-empty, this size of this array is equivalent to
   /// the array size.
-  const std::vector< ref<ConstantExpr> > constantValues;
+  const std::vector<ref<ConstantExpr> > constantValues;
 
-  Expr::Width domain, range;
+private:
+  unsigned hashValue;
+
+
+  /// The symbolic array singleton map is necessary to prevent aliasing issues on arrays.
+  /// A lot of the Solvers use maps such as <const * Array, std::vector<unsigned>>.
+  /// This causes problems because stored answers can't be easily recovered.
+  /// Even worse, in read expressions, such as (read array 32), array is a pointer,
+  /// meaning actual solutions don't "work" because the two instances of array
+  /// aren't recognized as the same.
+  static std::map<unsigned, std::vector<const Array *> *> symbolicArraySingletonMap;
+
+  //This shouldn't be allowed since it's a singleton class
+  Array(const Array& array);
+
+  //This shouldn't be allowed since it's a singleton class
+  Array& operator =(const Array& array);
+
+  ~Array();
 
-public:
   /// Array - Construct a new array object.
   ///
   /// \param _name - The name for this array. Names should generally be unique
@@ -619,13 +643,12 @@ public:
   /// when printing expressions. When expressions are printed the output will
   /// not parse correctly since two arrays with the same name cannot be
   /// distinguished once printed.
-  Array(const std::string &_name, uint64_t _size, 
-        const ref<ConstantExpr> *constantValuesBegin = 0,
-        const ref<ConstantExpr> *constantValuesEnd = 0,
-        Expr::Width _domain = Expr::Int32, Expr::Width _range = Expr::Int8)
-    : name(_name), size(_size), 
-      constantValues(constantValuesBegin, constantValuesEnd),
-      domain(_domain), range(_range) {
+  Array(const std::string &_name, uint64_t _size,
+		const ref<ConstantExpr> *constantValuesBegin = 0,
+		const ref<ConstantExpr> *constantValuesEnd = 0,
+		Expr::Width _domain = Expr::Int32, Expr::Width _range = Expr::Int8)
+    : name(_name), size(_size), domain(_domain), range(_range),
+	  constantValues(constantValuesBegin, constantValuesEnd) {
     assert((isSymbolicArray() || constantValues.size() == size) &&
            "Invalid size for constant array!");
     computeHash();
@@ -634,21 +657,35 @@ public:
          it != constantValuesEnd; ++it)
       assert((*it)->getWidth() == getRange() &&
              "Invalid initial constant value!");
-#endif
+#endif //NDEBUG
   }
-  ~Array();
 
+public:
   bool isSymbolicArray() const { return constantValues.empty(); }
   bool isConstantArray() const { return !isSymbolicArray(); }
 
+  const std::string getName() const { return name; }
+  unsigned getSize() const { return size; }
   Expr::Width getDomain() const { return domain; }
   Expr::Width getRange() const { return range; }
-  
+
+  /// ComputeHash must take into account the name, the size, the domain, and the range
   unsigned computeHash();
   unsigned hash() const { return hashValue; }
-   
-private:
-  unsigned hashValue;
+
+  /*
+   * Fairly simple idea.  Since by defn (see Array), symbolic arrays
+   * have constant values of size 0.  Concrete Arrays have their respective
+   * values stored in each element.  Therefore, each incoming call is tested.
+   * An array is created and, if it's concrete, it's simply returned.  If
+   * instead it is symbolic, then a map is checked to see if it was created
+   * before, so there is only a single instance floating out there.
+   */
+  static const Array * CreateArray(const std::string &_name, uint64_t _size,
+	                               const ref<ConstantExpr> *constantValuesBegin = 0,
+			                       const ref<ConstantExpr> *constantValuesEnd = 0,
+			                       Expr::Width _domain = Expr::Int32,
+								   Expr::Width _range = Expr::Int8);
 };
 
 /// Class representing a complete list of updates into an array.