From f049ff3bc04daead8c3bb9f06e89e71e2054c82a Mon Sep 17 00:00:00 2001 From: Eric Rizzi Date: Mon, 16 Feb 2015 13:16:12 -0500 Subject: Added factory method for Arrays + hid constructors from outside calls The way that Arrays were handled in the past led to the possibility of aliasing issues. This occured whenever a new branch discovered an array for the first time. Each branch would create a new instance of the same array without seeing if it had been created before. Therefore, should a new branch encounter the same state as some previous branch, the previous branch's solution wouldn't satisfy the new state since they didn't recognize they were referencing the same array. By creating an array factory that creates a single symbolic array, that problem is handled. Note: Concrete arrays should not be created by the factory method since their values are never shared between branches. The factory works by seeing if an array with a similar hash has been created before (the hash is based on the name and size of array). If there has been it then searches through all of the arrays with the same hash (stored in a vector) to see if there is one with an exact match. If there is one, the address of this previously created equivalent array is returned. Otherwise, the newly created array is unique, it is added to the map, and it's address is returned. This aliasing issue can be seen by comparing the output of the Dogfood/ImmutableSet.cpp test cases with and with out this commit. Both act correctly, but the number of queries making it to the solver in the previous version is much greater 244 vs 211. This is because the UBTree in the CexCachingSolver and the cache in the CachingSolver do not recognize queries whose solutions were previously calculated because it doesn't think the arrays in the two queries are the same. While this does not cause an error, it does mean that extra calls are made. --- include/klee/Expr.h | 71 +++++++++++++++++++++++++++++++---------- lib/Core/Executor.cpp | 4 +-- lib/Core/Memory.cpp | 4 +-- lib/Expr/Expr.cpp | 36 +++++++++++++++++++++ lib/Expr/Parser.cpp | 8 ++--- lib/SMT/SMTParser.cpp | 2 +- unittests/Expr/ExprTest.cpp | 8 ++--- unittests/Solver/SolverTest.cpp | 2 +- 8 files changed, 104 insertions(+), 31 deletions(-) diff --git a/include/klee/Expr.h b/include/klee/Expr.h index c78cd690..2ea7f40f 100644 --- a/include/klee/Expr.h +++ b/include/klee/Expr.h @@ -22,6 +22,7 @@ #include #include #include +#include namespace llvm { class Type; @@ -600,18 +601,41 @@ private: class Array { public: + //Name of the array const std::string name; + // FIXME: Not 64-bit clean. - unsigned size; + const unsigned size; + + /// Domain is how many bits can be used to access the array [32 bits] + /// Range is the size (in bits) of the number stored there (array of bytes -> 8) + const Expr::Width domain, range; /// constantValues - The constant initial values for this array, or empty for /// a symbolic array. If non-empty, this size of this array is equivalent to /// the array size. - const std::vector< ref > constantValues; + const std::vector > constantValues; - Expr::Width domain, range; +private: + unsigned hashValue; + + + /// The symbolic array singleton map is necessary to prevent aliasing issues on arrays. + /// A lot of the Solvers use maps such as >. + /// This causes problems because stored answers can't be easily recovered. + /// Even worse, in read expressions, such as (read array 32), array is a pointer, + /// meaning actual solutions don't "work" because the two instances of array + /// aren't recognized as the same. + static std::map *> symbolicArraySingletonMap; + + //This shouldn't be allowed since it's a singleton class + Array(const Array& array); + + //This shouldn't be allowed since it's a singleton class + Array& operator =(const Array& array); + + ~Array(); -public: /// Array - Construct a new array object. /// /// \param _name - The name for this array. Names should generally be unique @@ -619,13 +643,12 @@ public: /// when printing expressions. When expressions are printed the output will /// not parse correctly since two arrays with the same name cannot be /// distinguished once printed. - Array(const std::string &_name, uint64_t _size, - const ref *constantValuesBegin = 0, - const ref *constantValuesEnd = 0, - Expr::Width _domain = Expr::Int32, Expr::Width _range = Expr::Int8) - : name(_name), size(_size), - constantValues(constantValuesBegin, constantValuesEnd), - domain(_domain), range(_range) { + Array(const std::string &_name, uint64_t _size, + const ref *constantValuesBegin = 0, + const ref *constantValuesEnd = 0, + Expr::Width _domain = Expr::Int32, Expr::Width _range = Expr::Int8) + : name(_name), size(_size), domain(_domain), range(_range), + constantValues(constantValuesBegin, constantValuesEnd) { assert((isSymbolicArray() || constantValues.size() == size) && "Invalid size for constant array!"); computeHash(); @@ -634,21 +657,35 @@ public: it != constantValuesEnd; ++it) assert((*it)->getWidth() == getRange() && "Invalid initial constant value!"); -#endif +#endif //NDEBUG } - ~Array(); +public: bool isSymbolicArray() const { return constantValues.empty(); } bool isConstantArray() const { return !isSymbolicArray(); } + const std::string getName() const { return name; } + unsigned getSize() const { return size; } Expr::Width getDomain() const { return domain; } Expr::Width getRange() const { return range; } - + + /// ComputeHash must take into account the name, the size, the domain, and the range unsigned computeHash(); unsigned hash() const { return hashValue; } - -private: - unsigned hashValue; + + /* + * Fairly simple idea. Since by defn (see Array), symbolic arrays + * have constant values of size 0. Concrete Arrays have their respective + * values stored in each element. Therefore, each incoming call is tested. + * An array is created and, if it's concrete, it's simply returned. If + * instead it is symbolic, then a map is checked to see if it was created + * before, so there is only a single instance floating out there. + */ + static const Array * CreateArray(const std::string &_name, uint64_t _size, + const ref *constantValuesBegin = 0, + const ref *constantValuesEnd = 0, + Expr::Width _domain = Expr::Int32, + Expr::Width _range = Expr::Int8); }; /// Class representing a complete list of updates into an array. diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp index cdd6ba54..8631061f 100644 --- a/lib/Core/Executor.cpp +++ b/lib/Core/Executor.cpp @@ -2924,7 +2924,7 @@ ref Executor::replaceReadWithSymbolic(ExecutionState &state, // and return it. static unsigned id; - const Array *array = new Array("rrws_arr" + llvm::utostr(++id), + const Array *array = Array::CreateArray("rrws_arr" + llvm::utostr(++id), Expr::getMinBytesForWidth(e->getWidth())); ref res = Expr::createTempRead(array, e->getWidth()); ref eq = NotOptimizedExpr::create(EqExpr::create(e, res)); @@ -3263,7 +3263,7 @@ void Executor::executeMakeSymbolic(ExecutionState &state, while (!state.arrayNames.insert(uniqueName).second) { uniqueName = name + "_" + llvm::utostr(++id); } - const Array *array = new Array(uniqueName, mo->size); + const Array *array = Array::CreateArray(uniqueName, mo->size); bindObjectInState(state, mo, false, array); state.addSymbolic(mo, array); diff --git a/lib/Core/Memory.cpp b/lib/Core/Memory.cpp index b6f225d1..b9f6afd0 100644 --- a/lib/Core/Memory.cpp +++ b/lib/Core/Memory.cpp @@ -113,7 +113,7 @@ ObjectState::ObjectState(const MemoryObject *mo) if (!UseConstantArrays) { // FIXME: Leaked. static unsigned id = 0; - const Array *array = new Array("tmp_arr" + llvm::utostr(++id), size); + const Array *array = Array::CreateArray("tmp_arr" + llvm::utostr(++id), size); updates = UpdateList(array, 0); } memset(concreteStore, 0, size); @@ -222,7 +222,7 @@ const UpdateList &ObjectState::getUpdates() const { // Start a new update list. // FIXME: Leaked. static unsigned id = 0; - const Array *array = new Array("const_arr" + llvm::utostr(++id), size, + const Array *array = Array::CreateArray("const_arr" + llvm::utostr(++id), size, &Contents[0], &Contents[0] + Contents.size()); updates = UpdateList(array, 0); diff --git a/lib/Expr/Expr.cpp b/lib/Expr/Expr.cpp index d54b8f4d..46ea30fc 100644 --- a/lib/Expr/Expr.cpp +++ b/lib/Expr/Expr.cpp @@ -490,10 +490,46 @@ unsigned Array::computeHash() { unsigned res = 0; for (unsigned i = 0, e = name.size(); i != e; ++i) res = (res * Expr::MAGIC_HASH_CONSTANT) + name[i]; + res = (res * Expr::MAGIC_HASH_CONSTANT) + size; hashValue = res; return hashValue; } +std::map *> Array::symbolicArraySingletonMap; + +const Array * Array::CreateArray(const std::string &_name, uint64_t _size, + const ref *constantValuesBegin, + const ref *constantValuesEnd, + Expr::Width _domain, + Expr::Width _range){ + + const Array * array = new Array(_name, _size, constantValuesBegin, constantValuesEnd, _domain,_range); + if(array->constantValues.size() == 0){ + //means is a symbolic array and we should look up the values; + unsigned hash = array->hash(); + std::vector * bucket = Array::symbolicArraySingletonMap[hash]; + if(bucket){ + for(std::vector::const_iterator it = bucket->begin(); + it != bucket->end(); it ++){ + const Array* prospect = *it; + if(prospect->size == array->size && prospect->name == array->name){ + delete array; + return prospect; + } + } + bucket->push_back(array); + return array; + }else{ + bucket = new std::vector(); + bucket->push_back(array); + Array::symbolicArraySingletonMap[hash] = bucket; + return array; + } + }else{ + return array; + } +} + /***/ ref ReadExpr::create(const UpdateList &ul, ref index) { diff --git a/lib/Expr/Parser.cpp b/lib/Expr/Parser.cpp index aebce666..9df08903 100644 --- a/lib/Expr/Parser.cpp +++ b/lib/Expr/Parser.cpp @@ -519,12 +519,12 @@ DeclResult ParserImpl::ParseArrayDecl() { // FIXME: Array should take domain and range. const Identifier *Label = GetOrCreateIdentifier(Name); - Array *Root; + const Array *Root; if (!Values.empty()) - Root = new Array(Label->Name, Size.get(), + Root = Array::CreateArray(Label->Name, Size.get(), &Values[0], &Values[0] + Values.size()); else - Root = new Array(Label->Name, Size.get()); + Root = Array::CreateArray(Label->Name, Size.get()); ArrayDecl *AD = new ArrayDecl(Label, Size.get(), DomainType.get(), RangeType.get(), Root); @@ -1306,7 +1306,7 @@ VersionResult ParserImpl::ParseVersionSpecifier() { VersionResult Res = ParseVersion(); // Define update list to avoid use-of-undef errors. if (!Res.isValid()) { - Res = VersionResult(true, UpdateList(new Array("", 0), NULL)); + Res = VersionResult(true, UpdateList(Array::CreateArray("", 0), NULL)); } if (Label) diff --git a/lib/SMT/SMTParser.cpp b/lib/SMT/SMTParser.cpp index 03042fdd..eefe443a 100644 --- a/lib/SMT/SMTParser.cpp +++ b/lib/SMT/SMTParser.cpp @@ -165,7 +165,7 @@ void SMTParser::DeclareExpr(std::string name, Expr::Width w) { std::cout << "Declaring " << name << " of width " << w << "\n"; #endif - Array *arr = new Array(name, w / 8); + const Array *arr = Array::CreateArray(name, w / 8); ref *kids = new ref[w/8]; for (unsigned i=0; i < w/8; i++) diff --git a/unittests/Expr/ExprTest.cpp b/unittests/Expr/ExprTest.cpp index 18284f03..d05eb7ec 100644 --- a/unittests/Expr/ExprTest.cpp +++ b/unittests/Expr/ExprTest.cpp @@ -29,9 +29,9 @@ TEST(ExprTest, BasicConstruction) { } TEST(ExprTest, ConcatExtract) { - Array *array = new Array("arr0", 256); + const Array *array = Array::CreateArray("arr0", 256); ref read8 = Expr::createTempRead(array, 8); - Array *array2 = new Array("arr1", 256); + const Array *array2 = Array::CreateArray("arr1", 256); ref read8_2 = Expr::createTempRead(array2, 8); ref c100 = getConstant(100, 8); @@ -81,10 +81,10 @@ TEST(ExprTest, ConcatExtract) { } TEST(ExprTest, ExtractConcat) { - Array *array = new Array("arr2", 256); + const Array *array = Array::CreateArray("arr2", 256); ref read64 = Expr::createTempRead(array, 64); - Array *array2 = new Array("arr3", 256); + const Array *array2 = Array::CreateArray("arr3", 256); ref read8_2 = Expr::createTempRead(array2, 8); ref extract1 = ExtractExpr::create(read64, 36, 4); diff --git a/unittests/Solver/SolverTest.cpp b/unittests/Solver/SolverTest.cpp index 94529d56..d9aa9b56 100644 --- a/unittests/Solver/SolverTest.cpp +++ b/unittests/Solver/SolverTest.cpp @@ -46,7 +46,7 @@ void testOperation(Solver &solver, unsigned size = Expr::getMinBytesForWidth(operandWidth); static uint64_t id = 0; - Array *array = new Array("arr" + llvm::utostr(++id), size); + const Array *array = Array::CreateArray("arr" + llvm::utostr(++id), size); symbolicArgs.push_back(Expr::CreateArg(Expr::createTempRead(array, operandWidth))); } -- cgit 1.4.1