diff options
Diffstat (limited to 'lib')
85 files changed, 20933 insertions, 0 deletions
diff --git a/lib/Basic/BOut.cpp b/lib/Basic/BOut.cpp new file mode 100644 index 00000000..42d17e27 --- /dev/null +++ b/lib/Basic/BOut.cpp @@ -0,0 +1,236 @@ +//===-- BOut.c ------------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/ADT/BOut.h" + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#define BOUT_MAGIC "BOUT\n" +#define BOUT_MAGIC_SIZE 5 +#define BOUT_VERSION 2 + +/***/ + +static int read_uint32(FILE *f, unsigned *value_out) { + unsigned char data[4]; + if (fread(data, 4, 1, f)!=1) + return 0; + *value_out = (((((data[0]<<8) + data[1])<<8) + data[2])<<8) + data[3]; + return 1; +} + +static int write_uint32(FILE *f, unsigned value) { + unsigned char data[4]; + data[0] = value>>24; + data[1] = value>>16; + data[2] = value>> 8; + data[3] = value>> 0; + return fwrite(data, 1, 4, f)==4; +} + +static int read_string(FILE *f, char **value_out) { + unsigned len; + if (!read_uint32(f, &len)) + return 0; + *value_out = (char*) malloc(len+1); + if (!*value_out) + return 0; + if (fread(*value_out, len, 1, f)!=1) + return 0; + (*value_out)[len] = 0; + return 1; +} + +static int write_string(FILE *f, const char *value) { + unsigned len = strlen(value); + if (!write_uint32(f, len)) + return 0; + if (fwrite(value, len, 1, f)!=1) + return 0; + return 1; +} + +/***/ + + +unsigned bOut_getCurrentVersion() { + return BOUT_VERSION; +} + + +static int bOut_checkHeader(FILE *f) { + char header[BOUT_MAGIC_SIZE]; + if (fread(header, BOUT_MAGIC_SIZE, 1, f)!=1) + return 0; + if (memcmp(header, BOUT_MAGIC, BOUT_MAGIC_SIZE)) + return 0; + return 1; +} + +int bOut_isBOutFile(const char *path) { + FILE *f = fopen(path, "rb"); + int res; + + if (!f) + return 0; + res = bOut_checkHeader(f); + fclose(f); + + return res; +} + +BOut *bOut_fromFile(const char *path) { + FILE *f = fopen(path, "rb"); + BOut *res = 0; + unsigned i, version; + + if (!f) + goto error; + if (!bOut_checkHeader(f)) + goto error; + + res = (BOut*) calloc(1, sizeof(*res)); + if (!res) + goto error; + + if (!read_uint32(f, &version)) + goto error; + + if (version > bOut_getCurrentVersion()) + goto error; + + res->version = version; + + if (!read_uint32(f, &res->numArgs)) + goto error; + res->args = (char**) calloc(res->numArgs, sizeof(*res->args)); + if (!res->args) + goto error; + + for (i=0; i<res->numArgs; i++) + if (!read_string(f, &res->args[i])) + goto error; + + if (version >= 2) { + if (!read_uint32(f, &res->symArgvs)) + goto error; + if (!read_uint32(f, &res->symArgvLen)) + goto error; + } + + if (!read_uint32(f, &res->numObjects)) + goto error; + res->objects = (BOutObject*) calloc(res->numObjects, sizeof(*res->objects)); + if (!res->objects) + goto error; + for (i=0; i<res->numObjects; i++) { + BOutObject *o = &res->objects[i]; + if (!read_string(f, &o->name)) + goto error; + if (!read_uint32(f, &o->numBytes)) + goto error; + o->bytes = (unsigned char*) malloc(o->numBytes); + if (fread(o->bytes, o->numBytes, 1, f)!=1) + goto error; + } + + fclose(f); + + return res; + error: + if (res) { + if (res->args) { + for (i=0; i<res->numArgs; i++) + if (res->args[i]) + free(res->args[i]); + free(res->args); + } + if (res->objects) { + for (i=0; i<res->numObjects; i++) { + BOutObject *bo = &res->objects[i]; + if (bo->name) + free(bo->name); + if (bo->bytes) + free(bo->bytes); + } + free(res->objects); + } + free(res); + } + + if (f) fclose(f); + + return 0; +} + +int bOut_toFile(BOut *bo, const char *path) { + FILE *f = fopen(path, "wb"); + unsigned i; + + if (!f) + goto error; + if (fwrite(BOUT_MAGIC, strlen(BOUT_MAGIC), 1, f)!=1) + goto error; + if (!write_uint32(f, BOUT_VERSION)) + goto error; + + if (!write_uint32(f, bo->numArgs)) + goto error; + for (i=0; i<bo->numArgs; i++) { + if (!write_string(f, bo->args[i])) + goto error; + } + + if (!write_uint32(f, bo->symArgvs)) + goto error; + if (!write_uint32(f, bo->symArgvLen)) + goto error; + + if (!write_uint32(f, bo->numObjects)) + goto error; + for (i=0; i<bo->numObjects; i++) { + BOutObject *o = &bo->objects[i]; + if (!write_string(f, o->name)) + goto error; + if (!write_uint32(f, o->numBytes)) + goto error; + if (fwrite(o->bytes, o->numBytes, 1, f)!=1) + goto error; + } + + fclose(f); + + return 1; + error: + if (f) fclose(f); + + return 0; +} + +unsigned bOut_numBytes(BOut *bo) { + unsigned i, res = 0; + for (i=0; i<bo->numObjects; i++) + res += bo->objects[i].numBytes; + return res; +} + +void bOut_free(BOut *bo) { + unsigned i; + for (i=0; i<bo->numArgs; i++) + free(bo->args[i]); + free(bo->args); + for (i=0; i<bo->numObjects; i++) { + free(bo->objects[i].name); + free(bo->objects[i].bytes); + } + free(bo->objects); + free(bo); +} diff --git a/lib/Basic/Makefile b/lib/Basic/Makefile new file mode 100644 index 00000000..d4481e7f --- /dev/null +++ b/lib/Basic/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Basic/Makefile ----------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleeBasic +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Basic/README.txt b/lib/Basic/README.txt new file mode 100644 index 00000000..b13df6bd --- /dev/null +++ b/lib/Basic/README.txt @@ -0,0 +1,3 @@ +This directory holds the most basic support facilities provided for +both the klee and kleaver libraries. The code in this directory should +have no dependencies on LLVM or any other klee libraries. diff --git a/lib/Basic/Statistics.cpp b/lib/Basic/Statistics.cpp new file mode 100644 index 00000000..9c95a891 --- /dev/null +++ b/lib/Basic/Statistics.cpp @@ -0,0 +1,84 @@ +//===-- Statistics.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Statistics.h" + +#include <vector> + +using namespace klee; + +StatisticManager::StatisticManager() + : enabled(true), + globalStats(0), + indexedStats(0), + contextStats(0), + index(0) { +} + +StatisticManager::~StatisticManager() { + if (globalStats) delete[] globalStats; + if (indexedStats) delete[] indexedStats; +} + +void StatisticManager::useIndexedStats(unsigned totalIndices) { + if (indexedStats) delete[] indexedStats; + indexedStats = new uint64_t[totalIndices * stats.size()]; + memset(indexedStats, 0, sizeof(*indexedStats) * totalIndices * stats.size()); +} + +void StatisticManager::registerStatistic(Statistic &s) { + if (globalStats) delete[] globalStats; + s.id = stats.size(); + stats.push_back(&s); + globalStats = new uint64_t[stats.size()]; + memset(globalStats, 0, sizeof(*globalStats)*stats.size()); +} + +int StatisticManager::getStatisticID(const std::string &name) const { + for (unsigned i=0; i<stats.size(); i++) + if (stats[i]->getName() == name) + return i; + return -1; +} + +Statistic *StatisticManager::getStatisticByName(const std::string &name) const { + for (unsigned i=0; i<stats.size(); i++) + if (stats[i]->getName() == name) + return stats[i]; + return 0; +} + +StatisticManager *klee::theStatisticManager = 0; + +static StatisticManager &getStatisticManager() { + static StatisticManager sm; + theStatisticManager = &sm; + return sm; +} + +/* *** */ + +Statistic::Statistic(const std::string &_name, + const std::string &_shortName) + : name(_name), + shortName(_shortName) { + getStatisticManager().registerStatistic(*this); +} + +Statistic::~Statistic() { +} + +Statistic &Statistic::operator +=(const uint64_t addend) { + theStatisticManager->incrementStatistic(*this, addend); + return *this; +} + +uint64_t Statistic::getValue() const { + return theStatisticManager->getValue(*this); +} diff --git a/lib/Core/AddressSpace.cpp b/lib/Core/AddressSpace.cpp new file mode 100644 index 00000000..fb032fd5 --- /dev/null +++ b/lib/Core/AddressSpace.cpp @@ -0,0 +1,334 @@ +//===-- AddressSpace.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AddressSpace.h" +#include "CoreStats.h" +#include "Memory.h" +#include "TimingSolver.h" + +#include "klee/Expr.h" +#include "klee/TimerStatIncrementer.h" + +using namespace klee; + +/// + +void AddressSpace::bindObject(const MemoryObject *mo, ObjectState *os) { + assert(os->copyOnWriteOwner==0 && "object already has owner"); + os->copyOnWriteOwner = cowKey; + objects = objects.replace(std::make_pair(mo, os)); +} + +void AddressSpace::unbindObject(const MemoryObject *mo) { + objects = objects.remove(mo); +} + +const ObjectState *AddressSpace::findObject(const MemoryObject *mo) const { + const MemoryMap::value_type *res = objects.lookup(mo); + + return res ? res->second : 0; +} + +ObjectState *AddressSpace::getWriteable(const MemoryObject *mo, + const ObjectState *os) { + assert(!os->readOnly); + + if (cowKey==os->copyOnWriteOwner) { + return const_cast<ObjectState*>(os); + } else { + ObjectState *n = new ObjectState(*os); + n->copyOnWriteOwner = cowKey; + objects = objects.replace(std::make_pair(mo, n)); + return n; + } +} + +/// + +bool AddressSpace::resolveOne(uint64_t addr64, ObjectPair &result) { + unsigned address = (unsigned) addr64; + MemoryObject hack(address); + + if (const MemoryMap::value_type *res = objects.lookup_previous(&hack)) { + const MemoryObject *mo = res->first; + if ((mo->size==0 && address==mo->address) || + (address - mo->address < mo->size)) { + result = *res; + return true; + } + } + + return false; +} + +bool AddressSpace::resolveOne(ExecutionState &state, + TimingSolver *solver, + ref<Expr> address, + ObjectPair &result, + bool &success) { + if (address.isConstant()) { + success = resolveOne(address.getConstantValue(), result); + return true; + } else { + TimerStatIncrementer timer(stats::resolveTime); + + // try cheap search, will succeed for any inbounds pointer + + ref<Expr> cex(0); + if (!solver->getValue(state, address, cex)) + return false; + unsigned example = (unsigned) cex.getConstantValue(); + MemoryObject hack(example); + const MemoryMap::value_type *res = objects.lookup_previous(&hack); + + if (res) { + const MemoryObject *mo = res->first; + if (example - mo->address < mo->size) { + result = *res; + success = true; + return true; + } + } + + // didn't work, now we have to search + + MemoryMap::iterator oi = objects.upper_bound(&hack); + MemoryMap::iterator begin = objects.begin(); + MemoryMap::iterator end = objects.end(); + + MemoryMap::iterator start = oi; + while (oi!=begin) { + --oi; + const MemoryObject *mo = oi->first; + + bool mayBeTrue; + if (!solver->mayBeTrue(state, + mo->getBoundsCheckPointer(address), mayBeTrue)) + return false; + if (mayBeTrue) { + result = *oi; + success = true; + return true; + } else { + bool mustBeTrue; + if (!solver->mustBeTrue(state, + UgeExpr::create(address, mo->getBaseExpr()), + mustBeTrue)) + return false; + if (mustBeTrue) + break; + } + } + + // search forwards + for (oi=start; oi!=end; ++oi) { + const MemoryObject *mo = oi->first; + + bool mustBeTrue; + if (!solver->mustBeTrue(state, + UltExpr::create(address, mo->getBaseExpr()), + mustBeTrue)) + return false; + if (mustBeTrue) { + break; + } else { + bool mayBeTrue; + + if (!solver->mayBeTrue(state, + mo->getBoundsCheckPointer(address), + mayBeTrue)) + return false; + if (mayBeTrue) { + result = *oi; + success = true; + return true; + } + } + } + + success = false; + return true; + } +} + +bool AddressSpace::resolve(ExecutionState &state, + TimingSolver *solver, + ref<Expr> p, + ResolutionList &rl, + unsigned maxResolutions, + double timeout) { + if (p.isConstant()) { + ObjectPair res; + if (resolveOne(p.getConstantValue(), res)) + rl.push_back(res); + return false; + } else { + TimerStatIncrementer timer(stats::resolveTime); + uint64_t timeout_us = (uint64_t) (timeout*1000000.); + + // XXX in general this isn't exactly what we want... for + // a multiple resolution case (or for example, a \in {b,c,0}) + // we want to find the first object, find a cex assuming + // not the first, find a cex assuming not the second... + // etc. + + // XXX how do we smartly amortize the cost of checking to + // see if we need to keep searching up/down, in bad cases? + // maybe we don't care? + + // XXX we really just need a smart place to start (although + // if its a known solution then the code below is guaranteed + // to hit the fast path with exactly 2 queries). we could also + // just get this by inspection of the expr. + + ref<Expr> cex(0); + if (!solver->getValue(state, p, cex)) + return true; + unsigned example = (unsigned) cex.getConstantValue(); + MemoryObject hack(example); + + MemoryMap::iterator oi = objects.upper_bound(&hack); + MemoryMap::iterator begin = objects.begin(); + MemoryMap::iterator end = objects.end(); + + MemoryMap::iterator start = oi; + + // XXX in the common case we can save one query if we ask + // mustBeTrue before mayBeTrue for the first result. easy + // to add I just want to have a nice symbolic test case first. + + // search backwards, start with one minus because this + // is the object that p *should* be within, which means we + // get write off the end with 4 queries (XXX can be better, + // no?) + while (oi!=begin) { + --oi; + const MemoryObject *mo = oi->first; + if (timeout_us && timeout_us < timer.check()) + return true; + + // XXX I think there is some query wasteage here? + ref<Expr> inBounds = mo->getBoundsCheckPointer(p); + bool mayBeTrue; + if (!solver->mayBeTrue(state, inBounds, mayBeTrue)) + return true; + if (mayBeTrue) { + rl.push_back(*oi); + + // fast path check + unsigned size = rl.size(); + if (size==1) { + bool mustBeTrue; + if (!solver->mustBeTrue(state, inBounds, mustBeTrue)) + return true; + if (mustBeTrue) + return false; + } else if (size==maxResolutions) { + return true; + } + } + + bool mustBeTrue; + if (!solver->mustBeTrue(state, + UgeExpr::create(p, mo->getBaseExpr()), + mustBeTrue)) + return true; + if (mustBeTrue) + break; + } + // search forwards + for (oi=start; oi!=end; ++oi) { + const MemoryObject *mo = oi->first; + if (timeout_us && timeout_us < timer.check()) + return true; + + bool mustBeTrue; + if (!solver->mustBeTrue(state, + UltExpr::create(p, mo->getBaseExpr()), + mustBeTrue)) + return true; + if (mustBeTrue) + break; + + // XXX I think there is some query wasteage here? + ref<Expr> inBounds = mo->getBoundsCheckPointer(p); + bool mayBeTrue; + if (!solver->mayBeTrue(state, inBounds, mayBeTrue)) + return true; + if (mayBeTrue) { + rl.push_back(*oi); + + // fast path check + unsigned size = rl.size(); + if (size==1) { + bool mustBeTrue; + if (!solver->mustBeTrue(state, inBounds, mustBeTrue)) + return true; + if (mustBeTrue) + return false; + } else if (size==maxResolutions) { + return true; + } + } + } + } + + return false; +} + +// These two are pretty big hack so we can sort of pass memory back +// and forth to externals. They work by abusing the concrete cache +// store inside of the object states, which allows them to +// transparently avoid screwing up symbolics (if the byte is symbolic +// then its concrete cache byte isn't being used) but is just a hack. + +void AddressSpace::copyOutConcretes() { + for (MemoryMap::iterator it = objects.begin(), ie = objects.end(); + it != ie; ++it) { + const MemoryObject *mo = it->first; + + if (!mo->isUserSpecified) { + ObjectState *os = it->second; + uint8_t *address = (uint8_t*) (unsigned long) mo->address; + + if (!os->readOnly) + memcpy(address, os->concreteStore, mo->size); + } + } +} + +bool AddressSpace::copyInConcretes() { + for (MemoryMap::iterator it = objects.begin(), ie = objects.end(); + it != ie; ++it) { + const MemoryObject *mo = it->first; + + if (!mo->isUserSpecified) { + const ObjectState *os = it->second; + uint8_t *address = (uint8_t*) (unsigned long) mo->address; + + if (memcmp(address, os->concreteStore, mo->size)!=0) { + if (os->readOnly) { + return false; + } else { + ObjectState *wos = getWriteable(mo, os); + memcpy(wos->concreteStore, address, mo->size); + } + } + } + } + + return true; +} + +/***/ + +bool MemoryObjectLT::operator()(const MemoryObject *a, const MemoryObject *b) const { + return a->address < b->address; +} + diff --git a/lib/Core/AddressSpace.h b/lib/Core/AddressSpace.h new file mode 100644 index 00000000..a281714c --- /dev/null +++ b/lib/Core/AddressSpace.h @@ -0,0 +1,131 @@ +//===-- AddressSpace.h ------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_ADDRESSSPACE_H +#define KLEE_ADDRESSSPACE_H + +#include "ObjectHolder.h" + +#include "klee/Expr.h" +#include "klee/Internal/ADT/ImmutableMap.h" + +namespace klee { + class ExecutionState; + class MemoryObject; + class ObjectState; + class TimingSolver; + + template<class T> class ref; + + typedef std::pair<const MemoryObject*, const ObjectState*> ObjectPair; + typedef std::vector<ObjectPair> ResolutionList; + + /// Function object ordering MemoryObject's by address. + struct MemoryObjectLT { + bool operator()(const MemoryObject *a, const MemoryObject *b) const; + }; + + typedef ImmutableMap<const MemoryObject*, ObjectHolder, MemoryObjectLT> MemoryMap; + + class AddressSpace { + private: + /// Epoch counter used to control ownership of objects. + mutable unsigned cowKey; + + /// Unsupported, use copy constructor + AddressSpace &operator=(const AddressSpace&); + + public: + /// The MemoryObject -> ObjectState map that constitutes the + /// address space. + /// + /// The set of objects where o->copyOnWriteOwner == cowKey are the + /// objects that we own. + /// + /// \invariant forall o in objects, o->copyOnWriteOwner <= cowKey + MemoryMap objects; + + public: + AddressSpace() : cowKey(1) {} + AddressSpace(const AddressSpace &b) : cowKey(++b.cowKey), objects(b.objects) { } + ~AddressSpace() {} + + /// Resolve address to an ObjectPair in result. + /// \return true iff an object was found. + bool resolveOne(uint64_t address, + ObjectPair &result); + + /// Resolve address to an ObjectPair in result. + /// + /// \param state The state this address space is part of. + /// \param solver A solver used to determine possible + /// locations of the \a address. + /// \param address The address to search for. + /// \param[out] result An ObjectPair this address can resolve to + /// (when returning true). + /// \return true iff an object was found at \a address. + bool resolveOne(ExecutionState &state, + TimingSolver *solver, + ref<Expr> address, + ObjectPair &result, + bool &success); + + /// Resolve address to a list of ObjectPairs it can point to. If + /// maxResolutions is non-zero then no more than that many pairs + /// will be returned. + /// + /// \return true iff the resolution is incomplete (maxResolutions + /// is non-zero and the search terminated early, or a query timed out). + bool resolve(ExecutionState &state, + TimingSolver *solver, + ref<Expr> address, + ResolutionList &rl, + unsigned maxResolutions=0, + double timeout=0.); + + /***/ + + /// Add a binding to the address space. + void bindObject(const MemoryObject *mo, ObjectState *os); + + /// Remove a binding from the address space. + void unbindObject(const MemoryObject *mo); + + /// Lookup a binding from a MemoryObject. + const ObjectState *findObject(const MemoryObject *mo) const; + + /// \brief Obtain an ObjectState suitable for writing. + /// + /// This returns a writeable object state, creating a new copy of + /// the given ObjectState if necessary. If the address space owns + /// the ObjectState then this routine effectively just strips the + /// const qualifier it. + /// + /// \param mo The MemoryObject to get a writeable ObjectState for. + /// \param os The current binding of the MemoryObject. + /// \return A writeable ObjectState (\a os or a copy). + ObjectState *getWriteable(const MemoryObject *mo, const ObjectState *os); + + /// Copy the concrete values of all managed ObjectStates into the + /// actual system memory location they were allocated at. + void copyOutConcretes(); + + /// Copy the concrete values of all managed ObjectStates back from + /// the actual system memory location they were allocated + /// at. ObjectStates will only be written to (and thus, + /// potentially copied) if the memory values are different from + /// the current concrete values. + /// + /// \retval true The copy succeeded. + /// \retval false The copy failed because a read-only object was modified. + bool copyInConcretes(); + }; +} // End klee namespace + +#endif diff --git a/lib/Core/CallPathManager.cpp b/lib/Core/CallPathManager.cpp new file mode 100644 index 00000000..d0a61b31 --- /dev/null +++ b/lib/Core/CallPathManager.cpp @@ -0,0 +1,103 @@ +//===-- CallPathManager.cpp -----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CallPathManager.h" + +#include "klee/Statistics.h" + +#include <map> +#include <vector> +#include "llvm/Function.h" +#include "llvm/Support/Streams.h" + +using namespace llvm; +using namespace klee; + +/// + +CallPathNode::CallPathNode(CallPathNode *_parent, + Instruction *_callSite, + Function *_function) + : parent(_parent), + callSite(_callSite), + function(_function), + count(0) { +} + +void CallPathNode::print() { + llvm::cerr << " (Function: " << this->function->getName() << ", " + << "Callsite: " << callSite << ", " + << "Count: " << this->count << ")"; + if (parent && parent->callSite) { + llvm::cerr << ";\n"; + parent->print(); + } + else llvm::cerr << "\n"; +} + +/// + +CallPathManager::CallPathManager() : root(0, 0, 0) { +} + +CallPathManager::~CallPathManager() { + for (std::vector<CallPathNode*>::iterator it = paths.begin(), + ie = paths.end(); it != ie; ++it) + delete *it; +} + +void CallPathManager::getSummaryStatistics(CallSiteSummaryTable &results) { + results.clear(); + + for (std::vector<CallPathNode*>::iterator it = paths.begin(), + ie = paths.end(); it != ie; ++it) + (*it)->summaryStatistics = (*it)->statistics; + + // compute summary bottom up, while building result table + for (std::vector<CallPathNode*>::reverse_iterator it = paths.rbegin(), + ie = paths.rend(); it != ie; ++it) { + CallPathNode *cp = *it; + cp->parent->summaryStatistics += cp->summaryStatistics; + + CallSiteInfo &csi = results[cp->callSite][cp->function]; + csi.count += cp->count; + csi.statistics += cp->summaryStatistics; + } +} + + +CallPathNode *CallPathManager::computeCallPath(CallPathNode *parent, + Instruction *cs, + Function *f) { + for (CallPathNode *p=parent; p; p=p->parent) + if (cs==p->callSite && f==p->function) + return p; + + CallPathNode *cp = new CallPathNode(parent, cs, f); + paths.push_back(cp); + return cp; +} + +CallPathNode *CallPathManager::getCallPath(CallPathNode *parent, + Instruction *cs, + Function *f) { + std::pair<Instruction*,Function*> key(cs, f); + if (!parent) + parent = &root; + + CallPathNode::children_ty::iterator it = parent->children.find(key); + if (it==parent->children.end()) { + CallPathNode *cp = computeCallPath(parent, cs, f); + parent->children.insert(std::make_pair(key, cp)); + return cp; + } else { + return it->second; + } +} + diff --git a/lib/Core/CallPathManager.h b/lib/Core/CallPathManager.h new file mode 100644 index 00000000..2e16d72b --- /dev/null +++ b/lib/Core/CallPathManager.h @@ -0,0 +1,83 @@ +//===-- CallPathManager.h ---------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef __UTIL_CALLPATHMANAGER_H__ +#define __UTIL_CALLPATHMANAGER_H__ + +#include "klee/Statistics.h" + +#include <map> +#include <vector> + +namespace llvm { + class Instruction; + class Function; +} + +namespace klee { + class StatisticRecord; + + struct CallSiteInfo { + unsigned count; + StatisticRecord statistics; + + public: + CallSiteInfo() : count(0) {} + }; + + typedef std::map<llvm::Instruction*, + std::map<llvm::Function*, CallSiteInfo> > CallSiteSummaryTable; + + class CallPathNode { + friend class CallPathManager; + + public: + typedef std::map<std::pair<llvm::Instruction*, + llvm::Function*>, CallPathNode*> children_ty; + + // form list of (callSite,function) path + CallPathNode *parent; + llvm::Instruction *callSite; + llvm::Function *function; + children_ty children; + + StatisticRecord statistics; + StatisticRecord summaryStatistics; + unsigned count; + + public: + CallPathNode(CallPathNode *parent, + llvm::Instruction *callSite, + llvm::Function *function); + + void print(); + }; + + class CallPathManager { + CallPathNode root; + std::vector<CallPathNode*> paths; + + private: + CallPathNode *computeCallPath(CallPathNode *parent, + llvm::Instruction *callSite, + llvm::Function *f); + + public: + CallPathManager(); + ~CallPathManager(); + + void getSummaryStatistics(CallSiteSummaryTable &result); + + CallPathNode *getCallPath(CallPathNode *parent, + llvm::Instruction *callSite, + llvm::Function *f); + }; +} + +#endif diff --git a/lib/Core/Common.cpp b/lib/Core/Common.cpp new file mode 100644 index 00000000..479c4465 --- /dev/null +++ b/lib/Core/Common.cpp @@ -0,0 +1,110 @@ +//===-- Common.cpp --------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <string.h> + +#include <set> + +using namespace klee; + +FILE* klee::klee_warning_file = NULL; +FILE* klee::klee_message_file = NULL; + + +/* Prints a message/warning. + + If pfx is NULL, this is a regular message, and it's sent to + klee_message_file (messages.txt). Otherwise, it is sent to + klee_warning_file (warnings.txt). + + Iff onlyToFile is false, the message is also printed on stderr. +*/ +static void klee_vmessage(const char *pfx, bool onlyToFile, const char *msg, va_list ap) { + FILE *f = stderr; + if (!onlyToFile) { + fprintf(f, "KLEE: "); + if (pfx) fprintf(f, "%s: ", pfx); + vfprintf(f, msg, ap); + fprintf(f, "\n"); + fflush(f); + } + + if (pfx == NULL) + f = klee_message_file; + else f = klee_warning_file; + + if (f) { + fprintf(f, "KLEE: "); + if (pfx) fprintf(f, "%s: ", pfx); + vfprintf(f, msg, ap); + fprintf(f, "\n"); + fflush(f); + } +} + + +void klee::klee_message(const char *msg, ...) { + va_list ap; + va_start(ap, msg); + klee_vmessage(NULL, false, msg, ap); + va_end(ap); +} + +/* Message to be written only to file */ +void klee::klee_message_to_file(const char *msg, ...) { + va_list ap; + va_start(ap, msg); + klee_vmessage(NULL, true, msg, ap); + va_end(ap); +} + +void klee::klee_error(const char *msg, ...) { + va_list ap; + va_start(ap, msg); + klee_vmessage("ERROR", false, msg, ap); + va_end(ap); + exit(1); +} + +void klee::klee_warning(const char *msg, ...) { + va_list ap; + va_start(ap, msg); + klee_vmessage("WARNING", false, msg, ap); + va_end(ap); +} + + +/* Prints a warning once per message. */ +void klee::klee_warning_once(const void *id, const char *msg, ...) { + static std::set< std::pair<const void*, const char*> > keys; + std::pair<const void*, const char*> key; + + + /* "calling external" messages contain the actual arguments with + which we called the external function, so we need to ignore them + when computing the key. */ + if (strncmp(msg, "calling external", strlen("calling external")) != 0) + key = std::make_pair(id, msg); + else key = std::make_pair(id, "calling external"); + + if (!keys.count(key)) { + keys.insert(key); + + va_list ap; + va_start(ap, msg); + klee_vmessage("WARNING", false, msg, ap); + va_end(ap); + } +} diff --git a/lib/Core/Common.h b/lib/Core/Common.h new file mode 100644 index 00000000..ce05b536 --- /dev/null +++ b/lib/Core/Common.h @@ -0,0 +1,56 @@ +//===-- Common.h ------------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef __KLEE_COMMON_H__ +#define __KLEE_COMMON_H__ + +#ifdef __CYGWIN__ +#ifndef WINDOWS +#define WINDOWS +#endif +#endif + +#include <stdio.h> + +// XXX ugh +namespace klee { + class Solver; + + extern FILE* klee_warning_file; + extern FILE* klee_message_file; + + /// Print "KLEE: ERROR" followed by the msg in printf format and a + /// newline on stderr and to warnings.txt, then exit with an error. + void klee_error(const char *msg, ...) + __attribute__ ((format (printf, 1, 2), noreturn)); + + /// Print "KLEE: " followed by the msg in printf format and a + /// newline on stderr and to messages.txt. + void klee_message(const char *msg, ...) + __attribute__ ((format (printf, 1, 2))); + + /// Print "KLEE: " followed by the msg in printf format and a + /// newline to messages.txt. + void klee_message_to_file(const char *msg, ...) + __attribute__ ((format (printf, 1, 2))); + + /// Print "KLEE: WARNING" followed by the msg in printf format and a + /// newline on stderr and to warnings.txt. + void klee_warning(const char *msg, ...) + __attribute__ ((format (printf, 1, 2))); + + /// Print "KLEE: WARNING" followed by the msg in printf format and a + /// newline on stderr and to warnings.txt. However, the warning is only + /// printed once for each unique (id, msg) pair (as pointers). + void klee_warning_once(const void *id, + const char *msg, ...) + __attribute__ ((format (printf, 2, 3))); +} + +#endif /* __KLEE_COMMON_H__ */ diff --git a/lib/Core/CoreStats.cpp b/lib/Core/CoreStats.cpp new file mode 100644 index 00000000..ca2ef1c9 --- /dev/null +++ b/lib/Core/CoreStats.cpp @@ -0,0 +1,29 @@ +//===-- CoreStats.cpp -----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CoreStats.h" + +using namespace klee; + +Statistic stats::allocations("Allocations", "Alloc"); +Statistic stats::coveredInstructions("CoveredInstructions", "Icov"); +Statistic stats::falseBranches("FalseBranches", "Bf"); +Statistic stats::forkTime("ForkTime", "Ftime"); +Statistic stats::forks("Forks", "Forks"); +Statistic stats::instructionRealTime("InstructionRealTimes", "Ireal"); +Statistic stats::instructionTime("InstructionTimes", "Itime"); +Statistic stats::instructions("Instructions", "I"); +Statistic stats::minDistToReturn("MinDistToReturn", "Rdist"); +Statistic stats::minDistToUncovered("MinDistToUncovered", "UCdist"); +Statistic stats::reachableUncovered("ReachableUncovered", "IuncovReach"); +Statistic stats::resolveTime("ResolveTime", "Rtime"); +Statistic stats::solverTime("SolverTime", "Stime"); +Statistic stats::states("States", "States"); +Statistic stats::trueBranches("TrueBranches", "Bt"); +Statistic stats::uncoveredInstructions("UncoveredInstructions", "Iuncov"); diff --git a/lib/Core/CoreStats.h b/lib/Core/CoreStats.h new file mode 100644 index 00000000..09845a89 --- /dev/null +++ b/lib/Core/CoreStats.h @@ -0,0 +1,53 @@ +//===-- CoreStats.h ---------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_CORESTATS_H +#define KLEE_CORESTATS_H + +#include "klee/Statistic.h" + +namespace klee { +namespace stats { + + extern Statistic allocations; + extern Statistic resolveTime; + extern Statistic instructions; + extern Statistic instructionTime; + extern Statistic instructionRealTime; + extern Statistic coveredInstructions; + extern Statistic uncoveredInstructions; + extern Statistic trueBranches; + extern Statistic falseBranches; + extern Statistic forkTime; + extern Statistic solverTime; + + /// The number of process forks. + extern Statistic forks; + + /// Number of states, this is a "fake" statistic used by istats, it + /// isn't normally up-to-date. + extern Statistic states; + + /// Instruction level statistic for tracking number of reachable + /// uncovered instructions. + extern Statistic reachableUncovered; + + /// Instruction level statistic tracking the minimum intraprocedural + /// distance to an uncovered instruction; this is only periodically + /// updated. + extern Statistic minDistToUncovered; + + /// Instruction level statistic tracking the minimum intraprocedural + /// distance to a function return. + extern Statistic minDistToReturn; + +} +} + +#endif diff --git a/lib/Core/ExecutionState.cpp b/lib/Core/ExecutionState.cpp new file mode 100644 index 00000000..dd6d4647 --- /dev/null +++ b/lib/Core/ExecutionState.cpp @@ -0,0 +1,417 @@ +//===-- ExecutionState.cpp ------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/ExecutionState.h" + +#include "klee/Internal/Module/Cell.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/KModule.h" + +#include "klee/Expr.h" + +#include "Memory.h" + +#include "llvm/Function.h" +#include "llvm/Support/CommandLine.h" + +#include <iostream> +#include <cassert> +#include <map> +#include <set> +#include <stdarg.h> + +using namespace llvm; +using namespace klee; + +namespace { + cl::opt<bool> + DebugLogStateMerge("debug-log-state-merge"); +} + +/***/ + +StackFrame::StackFrame(KInstIterator _caller, KFunction *_kf) + : caller(_caller), kf(_kf), callPathNode(0), + minDistToUncoveredOnReturn(0), varargs(0) { + locals = new Cell[kf->numRegisters]; +} + +StackFrame::StackFrame(const StackFrame &s) + : caller(s.caller), + kf(s.kf), + callPathNode(s.callPathNode), + allocas(s.allocas), + minDistToUncoveredOnReturn(s.minDistToUncoveredOnReturn), + varargs(s.varargs) { + locals = new Cell[s.kf->numRegisters]; + for (unsigned i=0; i<s.kf->numRegisters; i++) + locals[i] = s.locals[i]; +} + +StackFrame::~StackFrame() { + delete[] locals; +} + +/***/ + +ExecutionState::ExecutionState(KFunction *kf) + : fakeState(false), + underConstrained(false), + depth(0), + pc(kf->instructions), + prevPC(pc), + queryCost(0.), + weight(1), + instsSinceCovNew(0), + coveredNew(false), + forkDisabled(false), + ptreeNode(0) { + pushFrame(0, kf); +} + +ExecutionState::ExecutionState(const std::vector<ref<Expr> > &assumptions) + : fakeState(true), + underConstrained(false), + constraints(assumptions), + queryCost(0.), + ptreeNode(0) { +} + +ExecutionState::~ExecutionState() { + while (!stack.empty()) popFrame(); +} + +ExecutionState *ExecutionState::branch() { + depth++; + + ExecutionState *falseState = new ExecutionState(*this); + falseState->coveredNew = false; + falseState->coveredLines.clear(); + + weight *= .5; + falseState->weight -= weight; + + return falseState; +} + +void ExecutionState::pushFrame(KInstIterator caller, KFunction *kf) { + stack.push_back(StackFrame(caller,kf)); +} + +void ExecutionState::popFrame() { + StackFrame &sf = stack.back(); + for (std::vector<const MemoryObject*>::iterator it = sf.allocas.begin(), + ie = sf.allocas.end(); it != ie; ++it) + addressSpace.unbindObject(*it); + stack.pop_back(); +} + +/// + +std::string ExecutionState::getFnAlias(std::string fn) { + std::map < std::string, std::string >::iterator it = fnAliases.find(fn); + if (it != fnAliases.end()) + return it->second; + else return ""; +} + +void ExecutionState::addFnAlias(std::string old_fn, std::string new_fn) { + fnAliases[old_fn] = new_fn; +} + +void ExecutionState::removeFnAlias(std::string fn) { + fnAliases.erase(fn); +} + +/**/ + +std::ostream &klee::operator<<(std::ostream &os, const MemoryMap &mm) { + os << "{"; + MemoryMap::iterator it = mm.begin(); + MemoryMap::iterator ie = mm.end(); + if (it!=ie) { + os << "MO" << it->first->id << ":" << it->second; + for (++it; it!=ie; ++it) + os << ", MO" << it->first->id << ":" << it->second; + } + os << "}"; + return os; +} + +bool ExecutionState::merge(const ExecutionState &b) { + if (DebugLogStateMerge) + llvm::cerr << "-- attempting merge of A:" + << this << " with B:" << &b << "--\n"; + if (pc != b.pc) + return false; + + // XXX is it even possible for these to differ? does it matter? probably + // implies difference in object states? + if (symbolics!=b.symbolics) + return false; + + { + std::vector<StackFrame>::const_iterator itA = stack.begin(); + std::vector<StackFrame>::const_iterator itB = b.stack.begin(); + while (itA!=stack.end() && itB!=b.stack.end()) { + // XXX vaargs? + if (itA->caller!=itB->caller || itA->kf!=itB->kf) + return false; + ++itA; + ++itB; + } + if (itA!=stack.end() || itB!=b.stack.end()) + return false; + } + + std::set< ref<Expr> > aConstraints(constraints.begin(), constraints.end()); + std::set< ref<Expr> > bConstraints(b.constraints.begin(), + b.constraints.end()); + std::set< ref<Expr> > commonConstraints, aSuffix, bSuffix; + std::set_intersection(aConstraints.begin(), aConstraints.end(), + bConstraints.begin(), bConstraints.end(), + std::inserter(commonConstraints, commonConstraints.begin())); + std::set_difference(aConstraints.begin(), aConstraints.end(), + commonConstraints.begin(), commonConstraints.end(), + std::inserter(aSuffix, aSuffix.end())); + std::set_difference(bConstraints.begin(), bConstraints.end(), + commonConstraints.begin(), commonConstraints.end(), + std::inserter(bSuffix, bSuffix.end())); + if (DebugLogStateMerge) { + llvm::cerr << "\tconstraint prefix: ["; + for (std::set< ref<Expr> >::iterator it = commonConstraints.begin(), + ie = commonConstraints.end(); it != ie; ++it) + llvm::cerr << *it << ", "; + llvm::cerr << "]\n"; + llvm::cerr << "\tA suffix: ["; + for (std::set< ref<Expr> >::iterator it = aSuffix.begin(), + ie = aSuffix.end(); it != ie; ++it) + llvm::cerr << *it << ", "; + llvm::cerr << "]\n"; + llvm::cerr << "\tB suffix: ["; + for (std::set< ref<Expr> >::iterator it = bSuffix.begin(), + ie = bSuffix.end(); it != ie; ++it) + llvm::cerr << *it << ", "; + llvm::cerr << "]\n"; + } + + // We cannot merge if addresses would resolve differently in the + // states. This means: + // + // 1. Any objects created since the branch in either object must + // have been free'd. + // + // 2. We cannot have free'd any pre-existing object in one state + // and not the other + + if (DebugLogStateMerge) { + llvm::cerr << "\tchecking object states\n"; + llvm::cerr << "A: " << addressSpace.objects << "\n"; + llvm::cerr << "B: " << b.addressSpace.objects << "\n"; + } + + std::set<const MemoryObject*> mutated; + MemoryMap::iterator ai = addressSpace.objects.begin(); + MemoryMap::iterator bi = b.addressSpace.objects.begin(); + MemoryMap::iterator ae = addressSpace.objects.end(); + MemoryMap::iterator be = b.addressSpace.objects.end(); + for (; ai!=ae && bi!=be; ++ai, ++bi) { + if (ai->first != bi->first) { + if (DebugLogStateMerge) { + if (ai->first < bi->first) { + llvm::cerr << "\t\tB misses binding for: " << ai->first->id << "\n"; + } else { + llvm::cerr << "\t\tA misses binding for: " << bi->first->id << "\n"; + } + } + return false; + } + if (ai->second != bi->second) { + if (DebugLogStateMerge) + llvm::cerr << "\t\tmutated: " << ai->first->id << "\n"; + mutated.insert(ai->first); + } + } + if (ai!=ae || bi!=be) { + if (DebugLogStateMerge) + llvm::cerr << "\t\tmappings differ\n"; + return false; + } + + // merge stack + + ref<Expr> inA(1, Expr::Bool), inB(1, Expr::Bool); + for (std::set< ref<Expr> >::iterator it = aSuffix.begin(), + ie = aSuffix.end(); it != ie; ++it) + inA = AndExpr::create(inA, *it); + for (std::set< ref<Expr> >::iterator it = bSuffix.begin(), + ie = bSuffix.end(); it != ie; ++it) + inB = AndExpr::create(inB, *it); + + // XXX should we have a preference as to which predicate to use? + // it seems like it can make a difference, even though logically + // they must contradict each other and so inA => !inB + + std::vector<StackFrame>::iterator itA = stack.begin(); + std::vector<StackFrame>::const_iterator itB = b.stack.begin(); + for (; itA!=stack.end(); ++itA, ++itB) { + StackFrame &af = *itA; + const StackFrame &bf = *itB; + for (unsigned i=0; i<af.kf->numRegisters; i++) { + ref<Expr> &av = af.locals[i].value; + const ref<Expr> &bv = bf.locals[i].value; + if (av.isNull() || bv.isNull()) { + // if one is null then by implication (we are at same pc) + // we cannot reuse this local, so just ignore + } else { + av = SelectExpr::create(inA, av, bv); + } + } + } + + for (std::set<const MemoryObject*>::iterator it = mutated.begin(), + ie = mutated.end(); it != ie; ++it) { + const MemoryObject *mo = *it; + const ObjectState *os = addressSpace.findObject(mo); + const ObjectState *otherOS = b.addressSpace.findObject(mo); + assert(os && !os->readOnly && + "objects mutated but not writable in merging state"); + assert(otherOS); + + ObjectState *wos = addressSpace.getWriteable(mo, os); + for (unsigned i=0; i<mo->size; i++) { + ref<Expr> av = wos->read8(i); + ref<Expr> bv = otherOS->read8(i); + wos->write(i, SelectExpr::create(inA, av, bv)); + } + } + + constraints = ConstraintManager(); + for (std::set< ref<Expr> >::iterator it = commonConstraints.begin(), + ie = commonConstraints.end(); it != ie; ++it) + constraints.addConstraint(*it); + constraints.addConstraint(OrExpr::create(inA, inB)); + + return true; +} + +/**/ + +/* + Used for tainting: create a clone of os that we can revirt to with + the behavior that all constraints are preserved, but writes are + discarded. When we revirt it will be at the same address. + */ +ObjectState *ExecutionState::cloneObject(ObjectState *os, + MemoryObject *mo) { + MemoryMap::iterator it = shadowObjects.find(mo); + if (it != shadowObjects.end()) + assert(0 && "Cannot exist already!"); + + llvm::cerr << "DRE: Inserting a cloned object: " << mo << "\n"; + shadowObjects = shadowObjects.replace(std::make_pair(mo, os)); + os = new ObjectState(*os); + addressSpace.bindObject(mo, os); + return os; +} + +/***/ + + +ExecutionTraceEvent::ExecutionTraceEvent(ExecutionState& state, + KInstruction* ki) + : consecutiveCount(1) +{ + file = ki->info->file; + line = ki->info->line; + funcName = state.stack.back().kf->function->getName(); + stackDepth = state.stack.size(); +} + +bool ExecutionTraceEvent::ignoreMe() const { + // ignore all events occurring in certain pesky uclibc files: + if (file.find("libc/stdio/") != std::string::npos) { + return true; + } + + return false; +} + +void ExecutionTraceEvent::print(std::ostream &os) const { + os.width(stackDepth); + os << ' '; + printDetails(os); + os << ' ' << file << ':' << line << ':' << funcName; + if (consecutiveCount > 1) + os << " (" << consecutiveCount << "x)\n"; + else + os << '\n'; +} + + +bool ExecutionTraceEventEquals(ExecutionTraceEvent* e1, ExecutionTraceEvent* e2) { + // first see if their base class members are identical: + if (!((e1->file == e2->file) && + (e1->line == e2->line) && + (e1->funcName == e2->funcName))) + return false; + + // fairly ugly, but i'm no OOP master, so this is the way i'm + // doing it for now ... lemme know if there's a cleaner way: + BranchTraceEvent* be1 = dynamic_cast<BranchTraceEvent*>(e1); + BranchTraceEvent* be2 = dynamic_cast<BranchTraceEvent*>(e2); + if (be1 && be2) { + return ((be1->trueTaken == be2->trueTaken) && + (be1->canForkGoBothWays == be2->canForkGoBothWays)); + } + + // don't tolerate duplicates in anything else: + return false; +} + + +void BranchTraceEvent::printDetails(std::ostream &os) const { + os << "BRANCH " << (trueTaken ? "T" : "F") << ' ' << + (canForkGoBothWays ? "2-way" : "1-way"); +} + +void ExecutionTraceManager::addEvent(ExecutionTraceEvent* evt) { + // don't trace anything before __user_main, except for global events + if (!hasSeenUserMain) { + if (evt->funcName == "__user_main") { + hasSeenUserMain = true; + } + else if (evt->funcName != "global_def") { + return; + } + } + + // custom ignore events: + if (evt->ignoreMe()) + return; + + if (events.size() > 0) { + // compress consecutive duplicates: + ExecutionTraceEvent* last = events.back(); + if (ExecutionTraceEventEquals(last, evt)) { + last->consecutiveCount++; + return; + } + } + + events.push_back(evt); +} + +void ExecutionTraceManager::printAllEvents(std::ostream &os) const { + for (unsigned i = 0; i != events.size(); ++i) + events[i]->print(os); +} + +/***/ diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp new file mode 100644 index 00000000..d3409908 --- /dev/null +++ b/lib/Core/Executor.cpp @@ -0,0 +1,3260 @@ +//===-- Executor.cpp ------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "Executor.h" + +#include "CoreStats.h" +#include "ExternalDispatcher.h" +#include "ImpliedValue.h" +#include "Memory.h" +#include "MemoryManager.h" +#include "PTree.h" +#include "Searcher.h" +#include "SeedInfo.h" +#include "SpecialFunctionHandler.h" +#include "StatsTracker.h" +#include "TimingSolver.h" +#include "UserSearcher.h" +#include "../Solver/SolverStats.h" + +#include "klee/ExecutionState.h" +#include "klee/Expr.h" +#include "klee/Interpreter.h" +#include "klee/Machine.h" +#include "klee/TimerStatIncrementer.h" +#include "klee/util/Assignment.h" +#include "klee/util/ExprPPrinter.h" +#include "klee/util/ExprUtil.h" +#include "klee/Config/config.h" +#include "klee/Internal/ADT/BOut.h" +#include "klee/Internal/ADT/RNG.h" +#include "klee/Internal/Module/Cell.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/KModule.h" +#include "klee/Internal/Support/FloatEvaluation.h" +#include "klee/Internal/System/Time.h" + +#include "llvm/Attributes.h" +#include "llvm/BasicBlock.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/System/Process.h" +#include "llvm/Target/TargetData.h" + +#include <cassert> +#include <algorithm> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <sstream> +#include <vector> +#include <string> + +#include <sys/mman.h> + +#include <errno.h> +#include <cxxabi.h> + +using namespace llvm; +using namespace klee; + +// omg really hard to share cl opts across files ... +bool WriteTraces = false; + +namespace { + cl::opt<bool> + DumpStatesOnHalt("dump-states-on-halt", + cl::init(true)); + + cl::opt<bool> + NoPreferCex("no-prefer-cex", + cl::init(false)); + + cl::opt<bool> + UseAsmAddresses("use-asm-addresses", + cl::init(false)); + + cl::opt<bool> + RandomizeFork("randomize-fork", + cl::init(false)); + + cl::opt<bool> + AllowExternalSymCalls("allow-external-sym-calls", + cl::init(false)); + + cl::opt<bool> + DebugPrintInstructions("debug-print-instructions", + cl::desc("Print instructions during execution.")); + + cl::opt<bool> + DebugCheckForImpliedValues("debug-check-for-implied-values"); + + + cl::opt<bool> + SimplifySymIndices("simplify-sym-indices", + cl::init(false)); + + cl::opt<unsigned> + MaxSymArraySize("max-sym-array-size", + cl::init(0)); + + cl::opt<bool> + DebugValidateSolver("debug-validate-solver", + cl::init(false)); + + cl::opt<bool> + SuppressExternalWarnings("suppress-external-warnings"); + + cl::opt<bool> + AllExternalWarnings("all-external-warnings"); + + cl::opt<bool> + OnlyOutputStatesCoveringNew("only-output-states-covering-new", + cl::init(false)); + + cl::opt<bool> + AlwaysOutputSeeds("always-output-seeds", + cl::init(true)); + + cl::opt<bool> + UseFastCexSolver("use-fast-cex-solver", + cl::init(false)); + + cl::opt<bool> + UseIndependentSolver("use-independent-solver", + cl::init(true), + cl::desc("Use constraint independence")); + + cl::opt<bool> + EmitAllErrors("emit-all-errors", + cl::init(false), + cl::desc("Generate tests cases for all errors " + "(default=one per (error,instruction) pair)")); + + cl::opt<bool> + UseCexCache("use-cex-cache", + cl::init(true), + cl::desc("Use counterexample caching")); + + cl::opt<bool> + UseQueryLog("use-query-log", + cl::init(false)); + + cl::opt<bool> + UseQueryPCLog("use-query-pc-log", + cl::init(false)); + + cl::opt<bool> + UseSTPQueryPCLog("use-stp-query-pc-log", + cl::init(false)); + + cl::opt<bool> + NoExternals("no-externals", + cl::desc("Do not allow external functin calls")); + + cl::opt<bool> + UseCache("use-cache", + cl::init(true), + cl::desc("Use validity caching")); + + cl::opt<bool> + OnlyReplaySeeds("only-replay-seeds", + cl::desc("Discard states that do not have a seed.")); + + cl::opt<bool> + OnlySeed("only-seed", + cl::desc("Stop execution after seeding is done without doing regular search.")); + + cl::opt<bool> + AllowSeedExtension("allow-seed-extension", + cl::desc("Allow extra (unbound) values to become symbolic during seeding.")); + + cl::opt<bool> + ZeroSeedExtension("zero-seed-extension"); + + cl::opt<bool> + AllowSeedTruncation("allow-seed-truncation", + cl::desc("Allow smaller buffers than in seeds.")); + + cl::opt<bool> + NamedSeedMatching("named-seed-matching", + cl::desc("Use names to match symbolic objects to inputs.")); + + cl::opt<double> + MaxStaticForkPct("max-static-fork-pct", cl::init(1.)); + cl::opt<double> + MaxStaticSolvePct("max-static-solve-pct", cl::init(1.)); + cl::opt<double> + MaxStaticCPForkPct("max-static-cpfork-pct", cl::init(1.)); + cl::opt<double> + MaxStaticCPSolvePct("max-static-cpsolve-pct", cl::init(1.)); + + cl::opt<double> + MaxInstructionTime("max-instruction-time", + cl::desc("Only allow a single instruction to take this much time (default=0 (off))"), + cl::init(0)); + + cl::opt<double> + SeedTime("seed-time", + cl::desc("Amount of time to dedicate to seeds, before normal search (default=0 (off))"), + cl::init(0)); + + cl::opt<double> + MaxSTPTime("max-stp-time", + cl::desc("Maximum amount of time for a single query (default=120s)"), + cl::init(120.0)); + + cl::opt<unsigned int> + StopAfterNInstructions("stop-after-n-instructions", + cl::desc("Stop execution after specified number of instructions (0=off)"), + cl::init(0)); + + cl::opt<unsigned> + MaxForks("max-forks", + cl::desc("Only fork this many times (-1=off)"), + cl::init(~0u)); + + cl::opt<unsigned> + MaxDepth("max-depth", + cl::desc("Only allow this many symbolic branches (0=off)"), + cl::init(0)); + + cl::opt<unsigned> + MaxMemory("max-memory", + cl::desc("Refuse to fork when more above this about of memory (in MB, 0=off)"), + cl::init(0)); + + cl::opt<bool> + MaxMemoryInhibit("max-memory-inhibit", + cl::desc("Inhibit forking at memory cap (vs. random terminat)"), + cl::init(true)); + + // use 'external storage' because also needed by tools/klee/main.cpp + cl::opt<bool, true> + WriteTracesProxy("write-traces", + cl::desc("Write .trace file for each terminated state"), + cl::location(WriteTraces), + cl::init(false)); + + cl::opt<bool> + UseForkedSTP("use-forked-stp", + cl::desc("Run STP in forked process")); +} + + +static void *theMMap = 0; +static unsigned theMMapSize = 0; + +namespace klee { + RNG theRNG; +} + +Solver *constructSolverChain(STPSolver *stpSolver, + std::string queryLogPath, + std::string stpQueryLogPath, + std::string queryPCLogPath, + std::string stpQueryPCLogPath) { + Solver *solver = stpSolver; + + if (UseSTPQueryPCLog) + solver = createPCLoggingSolver(solver, + stpQueryLogPath); + + if (UseFastCexSolver) + solver = createFastCexSolver(solver); + + if (UseCexCache) + solver = createCexCachingSolver(solver); + + if (UseCache) + solver = createCachingSolver(solver); + + if (UseIndependentSolver) + solver = createIndependentSolver(solver); + + if (DebugValidateSolver) + solver = createValidatingSolver(solver, stpSolver); + + if (UseQueryPCLog) + solver = createPCLoggingSolver(solver, + queryPCLogPath); + + return solver; +} + +Executor::Executor(const InterpreterOptions &opts, + InterpreterHandler *ih) + : Interpreter(opts), + kmodule(0), + interpreterHandler(ih), + searcher(0), + externalDispatcher(new ExternalDispatcher()), + statsTracker(0), + pathWriter(0), + symPathWriter(0), + specialFunctionHandler(0), + processTree(0), + replayOut(0), + replayPath(0), + usingSeeds(0), + atMemoryLimit(false), + inhibitForking(false), + haltExecution(false), + ivcEnabled(false), + stpTimeout(std::min(MaxSTPTime,MaxInstructionTime)) { + STPSolver *stpSolver = new STPSolver(UseForkedSTP); + Solver *solver = + constructSolverChain(stpSolver, + interpreterHandler->getOutputFilename("queries.qlog"), + interpreterHandler->getOutputFilename("stp-queries.qlog"), + interpreterHandler->getOutputFilename("queries.pc"), + interpreterHandler->getOutputFilename("stp-queries.pc")); + + this->solver = new TimingSolver(solver, stpSolver); + + memory = new MemoryManager(); +} + + +const Module *Executor::setModule(llvm::Module *module, + const ModuleOptions &opts) { + assert(!kmodule && module && "can only register one module"); // XXX gross + + kmodule = new KModule(module); + + specialFunctionHandler = new SpecialFunctionHandler(*this); + + specialFunctionHandler->prepare(); + kmodule->prepare(opts, interpreterHandler); + specialFunctionHandler->bind(); + + if (StatsTracker::useStatistics()) { + statsTracker = + new StatsTracker(*this, + interpreterHandler->getOutputFilename("assembly.ll"), + userSearcherRequiresMD2U()); + } + + return module; +} + +Executor::~Executor() { + delete memory; + delete externalDispatcher; + if (processTree) + delete processTree; + if (specialFunctionHandler) + delete specialFunctionHandler; + if (statsTracker) + delete statsTracker; + delete solver; + delete kmodule; +} + +/***/ + +void Executor::initializeGlobalObject(ExecutionState &state, ObjectState *os, + Constant *c, + unsigned offset) { + TargetData *targetData = kmodule->targetData; + if (ConstantVector *cp = dyn_cast<ConstantVector>(c)) { + unsigned elementSize = + targetData->getTypeStoreSize(cp->getType()->getElementType()); + for (unsigned i=0, e=cp->getNumOperands(); i != e; ++i) + initializeGlobalObject(state, os, cp->getOperand(i), + offset + i*elementSize); + } else if (isa<ConstantAggregateZero>(c)) { + unsigned i, size = targetData->getTypeStoreSize(c->getType()); + for (i=0; i<size; i++) + os->write8(offset+i, (uint8_t) 0); + } else if (ConstantArray *ca = dyn_cast<ConstantArray>(c)) { + unsigned elementSize = + targetData->getTypeStoreSize(ca->getType()->getElementType()); + for (unsigned i=0, e=ca->getNumOperands(); i != e; ++i) + initializeGlobalObject(state, os, ca->getOperand(i), + offset + i*elementSize); + } else if (ConstantStruct *cs = dyn_cast<ConstantStruct>(c)) { + const StructLayout *sl = + targetData->getStructLayout(cast<StructType>(cs->getType())); + for (unsigned i=0, e=cs->getNumOperands(); i != e; ++i) + initializeGlobalObject(state, os, cs->getOperand(i), + offset + sl->getElementOffset(i)); + } else { + os->write(offset, evalConstant(c)); + } +} + +MemoryObject * Executor::addExternalObject(ExecutionState &state, + void *addr, unsigned size, + bool isReadOnly) { + MemoryObject *mo = memory->allocateFixed((uint64_t) (unsigned long) addr, + size, 0); + ObjectState *os = bindObjectInState(state, mo, false); + for(unsigned i = 0; i < size; i++) + os->write8(i, ((uint8_t*)addr)[i]); + if(isReadOnly) + os->setReadOnly(true); + return mo; +} + +void Executor::initializeGlobals(ExecutionState &state) { + Module *m = kmodule->module; + + if (m->getModuleInlineAsm() != "") + klee_warning("executable has module level assembly (ignoring)"); + + assert(m->lib_begin() == m->lib_end() && + "XXX do not support dependent libraries"); + + // represent function globals using the address of the actual llvm function + // object. given that we use malloc to allocate memory in states this also + // ensures that we won't conflict. we don't need to allocate a memory object + // since reading/writing via a function pointer is unsupported anyway. + for (Module::iterator i = m->begin(), ie = m->end(); i != ie; ++i) { + Function *f = i; + ref<Expr> addr(0); + + // If the symbol has external weak linkage then it is implicitly + // not defined in this module; if it isn't resolvable then it + // should be null. + if (f->hasExternalWeakLinkage() && + !externalDispatcher->resolveSymbol(f->getName())) { + addr = Expr::createPointer(0); + } else { + addr = Expr::createPointer((unsigned long) (void*) f); + legalFunctions.insert(f); + } + + globalAddresses.insert(std::make_pair(f, addr)); + } + + // Disabled, we don't want to promote use of live externals. +#ifdef HAVE_CTYPE_EXTERNALS +#ifndef WINDOWS +#ifndef DARWIN + /* From /usr/include/errno.h: it [errno] is a per-thread variable. */ + int *errno_addr = __errno_location(); + addExternalObject(state, (void *)errno_addr, sizeof *errno_addr, false); + + /* from /usr/include/ctype.h: + These point into arrays of 384, so they can be indexed by any `unsigned + char' value [0,255]; by EOF (-1); or by any `signed char' value + [-128,-1). ISO C requires that the ctype functions work for `unsigned */ + const uint16_t **addr = __ctype_b_loc(); + addExternalObject(state, (void *)(*addr-128), + 384 * sizeof **addr, true); + addExternalObject(state, addr, 4, true); + + const int32_t **lower_addr = __ctype_tolower_loc(); + addExternalObject(state, (void *)(*lower_addr-128), + 384 * sizeof **lower_addr, true); + addExternalObject(state, lower_addr, 4, true); + + const int32_t **upper_addr = __ctype_toupper_loc(); + addExternalObject(state, (void *)(*upper_addr-128), + 384 * sizeof **upper_addr, true); + addExternalObject(state, upper_addr, 4, true); +#endif +#endif +#endif + + // allocate and initialize globals, done in two passes since we may + // need address of a global in order to initialize some other one. + + // allocate memory objects for all globals + for (Module::const_global_iterator i = m->global_begin(), + e = m->global_end(); + i != e; ++i) { + if (i->isDeclaration()) { + // FIXME: We have no general way of handling unknown external + // symbols. If we really cared about making external stuff work + // better we could support user definition, or use the EXE style + // hack where we check the object file information. + + const Type *ty = i->getType()->getElementType(); + const std::string &name = i->getName(); + uint64_t size = kmodule->targetData->getTypeStoreSize(ty); + + // XXX - DWD - hardcode some things until we decide how to fix. +#ifndef WINDOWS + if (name == "_ZTVN10__cxxabiv117__class_type_infoE") { + size = 0x2C; + } else if (name == "_ZTVN10__cxxabiv120__si_class_type_infoE") { + size = 0x2C; + } else if (name == "_ZTVN10__cxxabiv121__vmi_class_type_infoE") { + size = 0x2C; + } +#endif + + if (size == 0) { + llvm::cerr << "Unable to find size for global variable: " << i->getName() + << " (use will result in out of bounds access)\n"; + } + + MemoryObject *mo = memory->allocate(size, false, true, i); + ObjectState *os = bindObjectInState(state, mo, false); + globalObjects.insert(std::make_pair(i, mo)); + globalAddresses.insert(std::make_pair(i, mo->getBaseExpr())); + + // Program already running = object already initialized. Read + // concrete value and write it to our copy. + if (size) { + void *addr; + if (name=="__dso_handle") { + extern void *__dso_handle __attribute__ ((__weak__)); + addr = &__dso_handle; // wtf ? + } else { + addr = externalDispatcher->resolveSymbol(name); + } + if (!addr) + klee_error("unable to load symbol(%s) while initializing globals.", + name.c_str()); + + for (unsigned offset=0; offset<mo->size; offset++) + os->write8(offset, ((unsigned char*)addr)[offset]); + } + } else { + const std::string &name = i->getName(); + const Type *ty = i->getType()->getElementType(); + uint64_t size = kmodule->targetData->getTypeStoreSize(ty); + MemoryObject *mo = 0; + + if (UseAsmAddresses && name[0]=='\01') { + char *end; + uint64_t address = ::strtoll(name.c_str()+1, &end, 0); + + if (end && *end == '\0') { + klee_message("NOTE: allocated global at asm specified address: %#08llx" + " (%llu bytes)", + address, size); + mo = memory->allocateFixed(address, size, &*i); + mo->isUserSpecified = true; // XXX hack; + } + } + + if (!mo) + mo = memory->allocate(size, false, true, &*i); + assert(mo && "out of memory"); + ObjectState *os = bindObjectInState(state, mo, false); + globalObjects.insert(std::make_pair(i, mo)); + globalAddresses.insert(std::make_pair(i, mo->getBaseExpr())); + + if (!i->hasInitializer()) + os->initializeToRandom(); + } + } + + // link aliases to their definitions (if bound) + for (Module::alias_iterator i = m->alias_begin(), ie = m->alias_end(); + i != ie; ++i) { + // Map the alias to its aliasee's address. This works because we have + // addresses for everything, even undefined functions. + globalAddresses.insert(std::make_pair(i, evalConstant(i->getAliasee()))); + } + + // once all objects are allocated, do the actual initialization + for (Module::const_global_iterator i = m->global_begin(), + e = m->global_end(); + i != e; ++i) { + if (i->hasInitializer()) { + MemoryObject *mo = globalObjects.find(i)->second; + const ObjectState *os = state.addressSpace.findObject(mo); + assert(os); + ObjectState *wos = state.addressSpace.getWriteable(mo, os); + + initializeGlobalObject(state, wos, i->getInitializer(), 0); + // if(i->isConstant()) os->setReadOnly(true); + } + } +} + +void Executor::branch(ExecutionState &state, + const std::vector< ref<Expr> > &conditions, + std::vector<ExecutionState*> &result) { + TimerStatIncrementer timer(stats::forkTime); + unsigned N = conditions.size(); + assert(N); + + stats::forks += N-1; + + // XXX do proper balance or keep random? + result.push_back(&state); + for (unsigned i=1; i<N; ++i) { + ExecutionState *es = result[theRNG.getInt32() % i]; + ExecutionState *ns = es->branch(); + addedStates.insert(ns); + result.push_back(ns); + es->ptreeNode->data = 0; + std::pair<PTree::Node*,PTree::Node*> res = + processTree->split(es->ptreeNode, ns, es); + ns->ptreeNode = res.first; + es->ptreeNode = res.second; + } + + // If necessary redistribute seeds to match conditions, killing + // states if necessary due to OnlyReplaySeeds (inefficient but + // simple). + + std::map< ExecutionState*, std::vector<SeedInfo> >::iterator it = + seedMap.find(&state); + if (it != seedMap.end()) { + std::vector<SeedInfo> seeds = it->second; + seedMap.erase(it); + + // Assume each seed only satisfies one condition (necessarily true + // when conditions are mutually exclusive and their conjunction is + // a tautology). + for (std::vector<SeedInfo>::iterator siit = seeds.begin(), + siie = seeds.end(); siit != siie; ++siit) { + unsigned i; + for (i=0; i<N; ++i) { + ref<Expr> res; + bool success = + solver->getValue(state, siit->assignment.evaluate(conditions[i]), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res.getConstantValue()) + break; + } + + // If we didn't find a satisfying condition randomly pick one + // (the seed will be patched). + if (i==N) + i = theRNG.getInt32() % N; + + seedMap[result[i]].push_back(*siit); + } + + if (OnlyReplaySeeds) { + for (unsigned i=0; i<N; ++i) { + if (!seedMap.count(result[i])) { + terminateState(*result[i]); + result[i] = NULL; + } + } + } + } + + for (unsigned i=0; i<N; ++i) + if (result[i]) + addConstraint(*result[i], conditions[i]); +} + +Executor::StatePair +Executor::fork(ExecutionState ¤t, ref<Expr> condition, bool isInternal) { + Solver::Validity res; + std::map< ExecutionState*, std::vector<SeedInfo> >::iterator it = + seedMap.find(¤t); + bool isSeeding = it != seedMap.end(); + + if (!isSeeding && + !condition.isConstant() && + (MaxStaticForkPct!=1. || MaxStaticSolvePct != 1. || + MaxStaticCPForkPct!=1. || MaxStaticCPSolvePct != 1.) && + statsTracker->elapsed() > 60.) { + StatisticManager &sm = *theStatisticManager; + CallPathNode *cpn = current.stack.back().callPathNode; + if ((MaxStaticForkPct<1. && + sm.getIndexedValue(stats::forks, sm.getIndex()) > + stats::forks*MaxStaticForkPct) || + (MaxStaticCPForkPct<1. && + cpn && (cpn->statistics.getValue(stats::forks) > + stats::forks*MaxStaticCPForkPct)) || + (MaxStaticSolvePct<1 && + sm.getIndexedValue(stats::solverTime, sm.getIndex()) > + stats::solverTime*MaxStaticSolvePct) || + (MaxStaticCPForkPct<1. && + cpn && (cpn->statistics.getValue(stats::solverTime) > + stats::solverTime*MaxStaticCPSolvePct))) { + ref<Expr> value; + bool success = solver->getValue(current, condition, value); + assert(success && "FIXME: Unhandled solver failure"); + addConstraint(current, EqExpr::create(value, condition)); + condition = value; + } + } + + double timeout = stpTimeout; + if (isSeeding) + timeout *= it->second.size(); + solver->setTimeout(timeout); + bool success = solver->evaluate(current, condition, res); + solver->setTimeout(0); + if (!success) { + current.pc = current.prevPC; + terminateStateEarly(current, "query timed out"); + return StatePair(0, 0); + } + + if (!isSeeding) { + if (replayPath && !isInternal) { + assert(replayPosition<replayPath->size() && + "ran out of branches in replay path mode"); + bool branch = (*replayPath)[replayPosition++]; + + if (res==Solver::True) { + assert(branch && "hit invalid branch in replay path mode"); + } else if (res==Solver::False) { + assert(!branch && "hit invalid branch in replay path mode"); + } else { + // add constraints + if(branch) { + res = Solver::True; + addConstraint(current, condition); + } else { + res = Solver::False; + addConstraint(current, Expr::createNot(condition)); + } + } + } else if (res==Solver::Unknown) { + assert(!replayOut && "in replay mode, only one branch can be true."); + + if ((MaxMemoryInhibit && atMemoryLimit) || + current.forkDisabled || + inhibitForking || + (MaxForks!=~0u && stats::forks >= MaxForks)) { + TimerStatIncrementer timer(stats::forkTime); + if (theRNG.getBool()) { + addConstraint(current, condition); + res = Solver::True; + } else { + addConstraint(current, Expr::createNot(condition)); + res = Solver::False; + } + } + } + } + + // Fix branch in only-replay-seed mode, if we don't have both true + // and false seeds. + if (isSeeding && + (current.forkDisabled || OnlyReplaySeeds) && + res == Solver::Unknown) { + bool trueSeed=false, falseSeed=false; + // Is seed extension still ok here? + for (std::vector<SeedInfo>::iterator siit = it->second.begin(), + siie = it->second.end(); siit != siie; ++siit) { + ref<Expr> res; + bool success = + solver->getValue(current, siit->assignment.evaluate(condition), res); + assert(success && "FIXME: Unhandled solver failure"); + if (res.isConstant()) { + if (res.getConstantValue()) { + trueSeed = true; + } else { + falseSeed = true; + } + if (trueSeed && falseSeed) + break; + } + } + if (!(trueSeed && falseSeed)) { + assert(trueSeed || falseSeed); + + res = trueSeed ? Solver::True : Solver::False; + addConstraint(current, trueSeed ? condition : Expr::createNot(condition)); + } + } + + + // XXX - even if the constraint is provable one way or the other we + // can probably benefit by adding this constraint and allowing it to + // reduce the other constraints. For example, if we do a binary + // search on a particular value, and then see a comparison against + // the value it has been fixed at, we should take this as a nice + // hint to just use the single constraint instead of all the binary + // search ones. If that makes sense. + if (res==Solver::True) { + if (!isInternal) { + if (pathWriter) { + current.pathOS << "1"; + } + } + + return StatePair(¤t, 0); + } else if (res==Solver::False) { + if (!isInternal) { + if (pathWriter) { + current.pathOS << "0"; + } + } + + return StatePair(0, ¤t); + } else { + TimerStatIncrementer timer(stats::forkTime); + ExecutionState *falseState, *trueState = ¤t; + + ++stats::forks; + + falseState = trueState->branch(); + addedStates.insert(falseState); + + if (RandomizeFork && theRNG.getBool()) + std::swap(trueState, falseState); + + if (it != seedMap.end()) { + std::vector<SeedInfo> seeds = it->second; + it->second.clear(); + std::vector<SeedInfo> &trueSeeds = seedMap[trueState]; + std::vector<SeedInfo> &falseSeeds = seedMap[falseState]; + for (std::vector<SeedInfo>::iterator siit = seeds.begin(), + siie = seeds.end(); siit != siie; ++siit) { + ref<Expr> res; + bool success = + solver->getValue(current, siit->assignment.evaluate(condition), res); + assert(success && "FIXME: Unhandled solver failure"); + if (res.getConstantValue()) { + trueSeeds.push_back(*siit); + } else { + falseSeeds.push_back(*siit); + } + } + + bool swapInfo = false; + if (trueSeeds.empty()) { + if (¤t == trueState) swapInfo = true; + seedMap.erase(trueState); + } + if (falseSeeds.empty()) { + if (¤t == falseState) swapInfo = true; + seedMap.erase(falseState); + } + if (swapInfo) { + std::swap(trueState->coveredNew, falseState->coveredNew); + std::swap(trueState->coveredLines, falseState->coveredLines); + } + } + + current.ptreeNode->data = 0; + std::pair<PTree::Node*, PTree::Node*> res = + processTree->split(current.ptreeNode, falseState, trueState); + falseState->ptreeNode = res.first; + trueState->ptreeNode = res.second; + + if (!isInternal) { + if (pathWriter) { + falseState->pathOS = pathWriter->open(current.pathOS); + trueState->pathOS << "1"; + falseState->pathOS << "0"; + } + if (symPathWriter) { + falseState->symPathOS = symPathWriter->open(current.symPathOS); + trueState->symPathOS << "1"; + falseState->symPathOS << "0"; + } + } + + addConstraint(*trueState, condition); + addConstraint(*falseState, Expr::createNot(condition)); + + // Kinda gross, do we even really still want this option? + if (MaxDepth && MaxDepth<=trueState->depth) { + terminateStateEarly(*trueState, "max-depth exceeded"); + terminateStateEarly(*falseState, "max-depth exceeded"); + return StatePair(0, 0); + } + + return StatePair(trueState, falseState); + } +} + +void Executor::addConstraint(ExecutionState &state, ref<Expr> condition) { + if (condition.isConstant()) { + assert(condition.getConstantValue() && + "attempt to add invalid constraint"); + return; + } + + // Check to see if this constraint violates seeds. + std::map< ExecutionState*, std::vector<SeedInfo> >::iterator it = + seedMap.find(&state); + if (it != seedMap.end()) { + bool warn = false; + for (std::vector<SeedInfo>::iterator siit = it->second.begin(), + siie = it->second.end(); siit != siie; ++siit) { + bool res; + bool success = + solver->mustBeFalse(state, siit->assignment.evaluate(condition), res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + siit->patchSeed(state, condition, solver); + warn = true; + } + } + if (warn) + klee_warning("seeds patched for violating constraint"); + } + + state.addConstraint(condition); + if (ivcEnabled) + doImpliedValueConcretization(state, condition, ref<Expr>(1, Expr::Bool)); +} + +ref<Expr> Executor::evalConstant(Constant *c) { + if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(c)) { + return evalConstantExpr(ce); + } else { + if (const ConstantInt *ci = dyn_cast<ConstantInt>(c)) { + switch(ci->getBitWidth()) { + case 1: return ConstantExpr::create(ci->getZExtValue(), Expr::Bool); + case 8: return ConstantExpr::create(ci->getZExtValue(), Expr::Int8); + case 16: return ConstantExpr::create(ci->getZExtValue(), Expr::Int16); + case 32: return ConstantExpr::create(ci->getZExtValue(), Expr::Int32); + case 64: return ConstantExpr::create(ci->getZExtValue(), Expr::Int64); + default: + assert(0 && "XXX arbitrary bit width constants unhandled"); + } + } else if (const ConstantFP *cf = dyn_cast<ConstantFP>(c)) { + switch(cf->getType()->getTypeID()) { + case Type::FloatTyID: { + float f = cf->getValueAPF().convertToFloat(); + return ConstantExpr::create(floats::FloatAsUInt64(f), Expr::Int32); + } + case Type::DoubleTyID: { + double d = cf->getValueAPF().convertToDouble(); + return ConstantExpr::create(floats::DoubleAsUInt64(d), Expr::Int64); + } + case Type::X86_FP80TyID: { + // FIXME: This is really broken, but for now we just convert + // to a double. This isn't going to work at all in general, + // but we need support for wide constants. + APFloat apf = cf->getValueAPF(); + bool ignored; + APFloat::opStatus r = apf.convert(APFloat::IEEEdouble, + APFloat::rmNearestTiesToAway, + &ignored); + (void) r; + //assert(!(r & APFloat::opOverflow) && !(r & APFloat::opUnderflow) && + // "Overflow/underflow while converting from FP80 (x87) to 64-bit double"); + double d = apf.convertToDouble(); + return ConstantExpr::create(floats::DoubleAsUInt64(d), Expr::Int64); + } + default: + llvm::cerr << "Constant of type " << cf->getType()->getDescription() + << " not supported\n"; + llvm::cerr << "Constant used at "; + KConstant *kc = kmodule->getKConstant((Constant*) cf); + if (kc && kc->ki && kc->ki->info) + llvm::cerr << kc->ki->info->file << ":" << kc->ki->info->line << "\n"; + else llvm::cerr << "<unknown>\n"; + + assert(0 && "Arbitrary bit width floating point constants unsupported"); + } + } else if (const GlobalValue *gv = dyn_cast<GlobalValue>(c)) { + return globalAddresses.find(gv)->second; + } else if (isa<ConstantPointerNull>(c)) { + return Expr::createPointer(0); + } else if (isa<UndefValue>(c)) { + return ConstantExpr::create(0, Expr::getWidthForLLVMType(c->getType())); + } else { + // Constant{AggregateZero,Array,Struct,Vector} + assert(0 && "invalid argument to evalConstant()"); + } + } +} + +ref<Expr> Executor::eval(KInstruction *ki, + unsigned index, + ExecutionState &state) { + assert(index < ki->inst->getNumOperands()); + int vnumber = ki->operands[index]; + + // Determine if this is a constant or not. + if (vnumber < 0) { + unsigned index = -vnumber - 2; + Cell &c = kmodule->constantTable[index]; + return c.value; + } else { + unsigned index = vnumber; + StackFrame &sf = state.stack.back(); + Cell &c = sf.locals[index]; + return c.value; + } +} + +void Executor::bindLocal(KInstruction *target, ExecutionState &state, + ref<Expr> value) { + StackFrame &sf = state.stack.back(); + unsigned reg = target->dest; + Cell &c = sf.locals[reg]; + c.value = value; +} + +void Executor::bindArgument(KFunction *kf, unsigned index, + ExecutionState &state, ref<Expr> value) { + StackFrame &sf = state.stack.back(); + unsigned reg = kf->getArgRegister(index); + Cell &c = sf.locals[reg]; + c.value = value; +} + +ref<Expr> Executor::toUnique(const ExecutionState &state, + ref<Expr> &e) { + ref<Expr> result = e; + + if (!e.isConstant()) { + ref<Expr> value(0); + bool isTrue = false; + + solver->setTimeout(stpTimeout); + if (solver->getValue(state, e, value) && + solver->mustBeTrue(state, EqExpr::create(e, value), isTrue) && + isTrue) + result = value; + solver->setTimeout(0); + } + + return result; +} + + +/* Concretize the given expression, and return a possible constant value. + 'reason' is just a documentation string stating the reason for concretization. */ +ref<Expr> Executor::toConstant(ExecutionState &state, + ref<Expr> e, + const char *reason) { + e = state.constraints.simplifyExpr(e); + if (!e.isConstant()) { + ref<Expr> value; + bool success = solver->getValue(state, e, value); + assert(success && "FIXME: Unhandled solver failure"); + + std::ostringstream os; + os << "silently concretizing (reason: " << reason << ") expression " << e + << " to value " << value + << " (" << (*(state.pc)).info->file << ":" << (*(state.pc)).info->line << ")"; + + if (AllExternalWarnings) + klee_warning(reason, os.str().c_str()); + else + klee_warning_once(reason, "%s", os.str().c_str()); + + addConstraint(state, EqExpr::create(e, value)); + + return value; + } else { + return e; + } +} + +void Executor::executeGetValue(ExecutionState &state, + ref<Expr> e, + KInstruction *target) { + e = state.constraints.simplifyExpr(e); + std::map< ExecutionState*, std::vector<SeedInfo> >::iterator it = + seedMap.find(&state); + if (it==seedMap.end() || e.isConstant()) { + ref<Expr> value; + bool success = solver->getValue(state, e, value); + assert(success && "FIXME: Unhandled solver failure"); + bindLocal(target, state, value); + } else { + std::set< ref<Expr> > values; + for (std::vector<SeedInfo>::iterator siit = it->second.begin(), + siie = it->second.end(); siit != siie; ++siit) { + ref<Expr> value; + bool success = + solver->getValue(state, siit->assignment.evaluate(e), value); + assert(success && "FIXME: Unhandled solver failure"); + values.insert(value); + } + + std::vector< ref<Expr> > conditions; + for (std::set< ref<Expr> >::iterator vit = values.begin(), + vie = values.end(); vit != vie; ++vit) + conditions.push_back(EqExpr::create(e, *vit)); + + std::vector<ExecutionState*> branches; + branch(state, conditions, branches); + + std::vector<ExecutionState*>::iterator bit = branches.begin(); + for (std::set< ref<Expr> >::iterator vit = values.begin(), + vie = values.end(); vit != vie; ++vit) { + ExecutionState *es = *bit; + if (es) + bindLocal(target, *es, *vit); + ++bit; + } + } +} + +void Executor::stepInstruction(ExecutionState &state) { + if (DebugPrintInstructions) { + printFileLine(state, state.pc); + llvm::cerr << std::setw(10) << stats::instructions << " " << *state.pc->inst; + } + + if (statsTracker) + statsTracker->stepInstruction(state); + + ++stats::instructions; + state.prevPC = state.pc; + ++state.pc; + + if (stats::instructions==StopAfterNInstructions) + haltExecution = true; +} + +void Executor::executeCall(ExecutionState &state, + KInstruction *ki, + Function *f, + std::vector< ref<Expr> > &arguments) { + if (WriteTraces) { + // don't print out special debug stop point 'function' calls + if (f->getIntrinsicID() != Intrinsic::dbg_stoppoint) { + const std::string& calleeFuncName = f->getName(); + state.exeTraceMgr.addEvent(new FunctionCallTraceEvent(state, ki, calleeFuncName)); + } + } + + Instruction *i = ki->inst; + if (f && f->isDeclaration()) { + if (f!=kmodule->dbgStopPointFn) { // special case speed hack + switch(f->getIntrinsicID()) { + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + case Intrinsic::dbg_declare: + case Intrinsic::not_intrinsic: + // state may be destroyed by this call, cannot touch + callExternalFunction(state, ki, f, arguments); + break; + + // vararg is handled by caller and intrinsic lowering, + // see comment for ExecutionState::varargs + case Intrinsic::vastart: { + StackFrame &sf = state.stack.back(); + assert(sf.varargs && + "vastart called in function with no vararg object"); + executeMemoryOperation(state, true, arguments[0], + sf.varargs->getBaseExpr(), 0); + break; + } + case Intrinsic::vaend: // va_end is a noop for the interpreter + break; + + case Intrinsic::vacopy: // should be lowered + default: + klee_error("unknown intrinsic: %s", f->getName().c_str()); + } + } + + if (InvokeInst *ii = dyn_cast<InvokeInst>(i)) { + transferToBasicBlock(ii->getNormalDest(), i->getParent(), state); + } + } else { + // XXX not really happy about this reliance on prevPC but is ok I + // guess. This just done to avoid having to pass KInstIterator + // everywhere instead of the actual instruction, since we can't + // make a KInstIterator from just an instruction (unlike LLVM). + KFunction *kf = kmodule->functionMap[f]; + state.pushFrame(state.prevPC, kf); + state.pc = kf->instructions; + + if (statsTracker) + statsTracker->framePushed(state, &state.stack[state.stack.size()-2]); + + unsigned callingArgs = arguments.size(); + unsigned funcArgs = f->arg_size(); + if (!f->isVarArg()) { + if (callingArgs > funcArgs) { + klee_warning_once(f, "calling %s with extra arguments.", + f->getName().c_str()); + } else if (callingArgs < funcArgs) { + terminateStateOnError(state, "calling function with too few arguments", + "user.err"); + return; + } + } else { + if (callingArgs < funcArgs) { + terminateStateOnError(state, "calling function with too few arguments", + "user.err"); + return; + } + + StackFrame &sf = state.stack.back(); + unsigned size = 0; + for (unsigned i = funcArgs; i < callingArgs; i++) + size += Expr::getMinBytesForWidth(arguments[i].getWidth()); + + MemoryObject *mo = sf.varargs = memory->allocate(size, true, false, + state.prevPC->inst); + if (!mo) { + terminateStateOnExecError(state, "out of memory (varargs)"); + return; + } + ObjectState *os = bindObjectInState(state, mo, true); + unsigned offset = 0; + for (unsigned i = funcArgs; i < callingArgs; i++) { + // XXX: DRE: i think we bind memory objects here? + os->write(offset, arguments[i]); + offset += Expr::getMinBytesForWidth(arguments[i].getWidth()); + } + } + + unsigned numFormals = f->arg_size(); + for (unsigned i=0; i<numFormals; ++i) + bindArgument(kf, i, state, arguments[i]); + } +} + +void Executor::transferToBasicBlock(BasicBlock *dst, BasicBlock *src, + ExecutionState &state) { + // Note that in general phi nodes can reuse phi values from the same + // block but the incoming value is the eval() result *before* the + // execution of any phi nodes. this is pathological and doesn't + // really seem to occur, but just in case we run the PhiCleanerPass + // which makes sure this cannot happen and so it is safe to just + // eval things in order. The PhiCleanerPass also makes sure that all + // incoming blocks have the same order for each PHINode so we only + // have to compute the index once. + // + // With that done we simply set an index in the state so that PHI + // instructions know which argument to eval, set the pc, and continue. + + // XXX this lookup has to go ? + KFunction *kf = state.stack.back().kf; + unsigned entry = kf->basicBlockEntry[dst]; + state.pc = &kf->instructions[entry]; + if (state.pc->inst->getOpcode() == Instruction::PHI) { + PHINode *first = static_cast<PHINode*>(state.pc->inst); + state.incomingBBIndex = first->getBasicBlockIndex(src); + } +} + +void Executor::printFileLine(ExecutionState &state, KInstruction *ki) { + const InstructionInfo &ii = *ki->info; + if (ii.file != "") + llvm::cerr << " " << ii.file << ":" << ii.line << ":"; + else + llvm::cerr << " [no debug info]:"; +} + + +Function* Executor::getCalledFunction(CallSite &cs, ExecutionState &state) { + Function *f = cs.getCalledFunction(); + + if (f) { + std::string alias = state.getFnAlias(f->getName()); + if (alias != "") { + //llvm::cerr << f->getName() << "() is aliased with " << alias << "()\n"; + llvm::Module* currModule = kmodule->module; + Function* old_f = f; + f = currModule->getFunction(alias); + if (!f) { + llvm::cerr << "Function " << alias << "(), alias for " << old_f->getName() << " not found!\n"; + assert(f && "function alias not found"); + } + } + } + + return f; +} + + +void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { + Instruction *i = ki->inst; + switch (i->getOpcode()) { + // Control flow + case Instruction::Ret: { + ReturnInst *ri = cast<ReturnInst>(i); + KInstIterator kcaller = state.stack.back().caller; + Instruction *caller = kcaller ? kcaller->inst : 0; + bool isVoidReturn = (ri->getNumOperands() == 0); + ref<Expr> result(0,Expr::Bool); + + if (WriteTraces) { + state.exeTraceMgr.addEvent(new FunctionReturnTraceEvent(state, ki)); + } + + if (!isVoidReturn) { + result = eval(ki, 0, state); + } + + if (state.stack.size() <= 1) { + assert(!caller && "caller set on initial stack frame"); + terminateStateOnExit(state); + } else { + state.popFrame(); + + if (statsTracker) + statsTracker->framePopped(state); + + if (InvokeInst *ii = dyn_cast<InvokeInst>(caller)) { + transferToBasicBlock(ii->getNormalDest(), caller->getParent(), state); + } else { + state.pc = kcaller; + ++state.pc; + } + + if (!isVoidReturn) { + const Type *t = caller->getType(); + if (t != Type::VoidTy) { + // may need to do coercion due to bitcasts + Expr::Width from = result.getWidth(); + Expr::Width to = Expr::getWidthForLLVMType(t); + + if (from != to) { + CallSite cs = (isa<InvokeInst>(caller) ? CallSite(cast<InvokeInst>(caller)) : + CallSite(cast<CallInst>(caller))); + + // XXX need to check other param attrs ? + if (cs.paramHasAttr(0, llvm::Attribute::SExt)) { + result = SExtExpr::create(result, to); + } else { + result = ZExtExpr::create(result, to); + } + } + + bindLocal(kcaller, state, result); + } + } else { + // We check that the return value has no users instead of + // checking the type, since C defaults to returning int for + // undeclared functions. + if (!caller->use_empty()) { + terminateStateOnExecError(state, "return void when caller expected a result"); + } + } + } + break; + } + case Instruction::Unwind: { + for (;;) { + KInstruction *kcaller = state.stack.back().caller; + state.popFrame(); + + if (statsTracker) + statsTracker->framePopped(state); + + if (state.stack.empty()) { + terminateStateOnExecError(state, "unwind from initial stack frame"); + break; + } else { + Instruction *caller = kcaller->inst; + if (InvokeInst *ii = dyn_cast<InvokeInst>(caller)) { + transferToBasicBlock(ii->getUnwindDest(), caller->getParent(), state); + break; + } + } + } + break; + } + case Instruction::Br: { + BranchInst *bi = cast<BranchInst>(i); + if (bi->isUnconditional()) { + transferToBasicBlock(bi->getSuccessor(0), bi->getParent(), state); + } else { + // FIXME: Find a way that we don't have this hidden dependency. + assert(bi->getCondition() == bi->getOperand(0) && + "Wrong operand index!"); + ref<Expr> cond = eval(ki, 0, state); + Executor::StatePair branches = fork(state, cond, false); + + if (WriteTraces) { + bool isTwoWay = (branches.first && branches.second); + + if (branches.first) { + branches.first->exeTraceMgr.addEvent( + new BranchTraceEvent(state, ki, true, isTwoWay)); + } + + if (branches.second) { + branches.second->exeTraceMgr.addEvent( + new BranchTraceEvent(state, ki, false, isTwoWay)); + } + } + + // NOTE: There is a hidden dependency here, markBranchVisited + // requires that we still be in the context of the branch + // instruction (it reuses its statistic id). Should be cleaned + // up with convenient instruction specific data. + if (statsTracker && state.stack.back().kf->trackCoverage) + statsTracker->markBranchVisited(branches.first, branches.second); + + if (branches.first) + transferToBasicBlock(bi->getSuccessor(0), bi->getParent(), *branches.first); + if (branches.second) + transferToBasicBlock(bi->getSuccessor(1), bi->getParent(), *branches.second); + } + break; + } + case Instruction::Switch: { + SwitchInst *si = cast<SwitchInst>(i); + ref<Expr> cond = eval(ki, 0, state); + unsigned cases = si->getNumCases(); + BasicBlock *bb = si->getParent(); + + cond = toUnique(state, cond); + if (cond.isConstant()) { + // Somewhat gross to create these all the time, but fine till we + // switch to an internal rep. + ConstantInt *ci = ConstantInt::get(si->getCondition()->getType(), + cond.getConstantValue()); + unsigned index = si->findCaseValue(ci); + transferToBasicBlock(si->getSuccessor(index), si->getParent(), state); + } else { + std::map<BasicBlock*, ref<Expr> > targets; + ref<Expr> isDefault(1,Expr::Bool); + for (unsigned i=1; i<cases; ++i) { + ref<Expr> value = evalConstant(si->getCaseValue(i)); + ref<Expr> match = EqExpr::create(cond, value); + isDefault = AndExpr::create(isDefault, Expr::createNot(match)); + bool result; + bool success = solver->mayBeTrue(state, match, result); + assert(success && "FIXME: Unhandled solver failure"); + if (result) { + std::map<BasicBlock*, ref<Expr> >::iterator it = + targets.insert(std::make_pair(si->getSuccessor(i), + ref<Expr>(0,Expr::Bool))).first; + it->second = OrExpr::create(match, it->second); + } + } + bool res; + bool success = solver->mayBeTrue(state, isDefault, res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) + targets.insert(std::make_pair(si->getSuccessor(0), isDefault)); + + std::vector< ref<Expr> > conditions; + for (std::map<BasicBlock*, ref<Expr> >::iterator it = + targets.begin(), ie = targets.end(); + it != ie; ++it) + conditions.push_back(it->second); + + std::vector<ExecutionState*> branches; + branch(state, conditions, branches); + + std::vector<ExecutionState*>::iterator bit = branches.begin(); + for (std::map<BasicBlock*, ref<Expr> >::iterator it = + targets.begin(), ie = targets.end(); + it != ie; ++it) { + ExecutionState *es = *bit; + if (es) + transferToBasicBlock(it->first, bb, *es); + ++bit; + } + } + break; + } + case Instruction::Unreachable: + // Note that this is not necessarily an internal bug, llvm will + // generate unreachable instructions in cases where it knows the + // program will crash. So it is effectively a SEGV or internal + // error. + terminateStateOnExecError(state, "reached \"unreachable\" instruction"); + break; + + case Instruction::Invoke: + case Instruction::Call: { + CallSite cs; + unsigned argStart; + if (i->getOpcode()==Instruction::Call) { + cs = CallSite(cast<CallInst>(i)); + argStart = 1; + } else { + cs = CallSite(cast<InvokeInst>(i)); + argStart = 3; + } + + unsigned numArgs = cs.arg_size(); + Function *f = getCalledFunction(cs, state); + + // evaluate arguments + std::vector< ref<Expr> > arguments; + arguments.reserve(numArgs); + + for (unsigned j=0; j<numArgs; ++j) + arguments.push_back(eval(ki, argStart+j, state)); + + if (!f) { + // special case the call with a bitcast case + Value *fp = cs.getCalledValue(); + llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(fp); + + if (ce && ce->getOpcode()==Instruction::BitCast) { + f = dyn_cast<Function>(ce->getOperand(0)); + assert(f && "XXX unrecognized constant expression in call"); + const FunctionType *fType = + dyn_cast<FunctionType>(cast<PointerType>(f->getType())->getElementType()); + const FunctionType *ceType = + dyn_cast<FunctionType>(cast<PointerType>(ce->getType())->getElementType()); + assert(fType && ceType && "unable to get function type"); + + // XXX check result coercion + + // XXX this really needs thought and validation + unsigned i=0; + for (std::vector< ref<Expr> >::iterator + ai = arguments.begin(), ie = arguments.end(); + ai != ie; ++ai) { + Expr::Width to, from = (*ai).getWidth(); + + if (i<fType->getNumParams()) { + to = Expr::getWidthForLLVMType(fType->getParamType(i)); + + if (from != to) { + // XXX need to check other param attrs ? + if (cs.paramHasAttr(i+1, llvm::Attribute::SExt)) { + arguments[i] = SExtExpr::create(arguments[i], to); + } else { + arguments[i] = ZExtExpr::create(arguments[i], to); + } + } + } + + i++; + } + } else if (isa<InlineAsm>(fp)) { + terminateStateOnExecError(state, "inline assembly is unsupported"); + break; + } + } + + if (f) { + executeCall(state, ki, f, arguments); + } else { + ref<Expr> v = eval(ki, 0, state); + + ExecutionState *free = &state; + bool hasInvalid = false, first = true; + + /* XXX This is wasteful, no need to do a full evaluate since we + have already got a value. But in the end the caches should + handle it for us, albeit with some overhead. */ + do { + ref<Expr> value; + bool success = solver->getValue(*free, v, value); + assert(success && "FIXME: Unhandled solver failure"); + StatePair res = fork(*free, EqExpr::create(v, value), true); + if (res.first) { + void *addr = (void*) (unsigned long) value.getConstantValue(); + std::set<void*>::iterator it = legalFunctions.find(addr); + if (it != legalFunctions.end()) { + f = (Function*) addr; + + // Don't give warning on unique resolution + if (res.second || !first) + klee_warning_once(addr, + "resolved symbolic function pointer to: %s", + f->getName().c_str()); + + executeCall(*res.first, ki, f, arguments); + } else { + if (!hasInvalid) { + terminateStateOnExecError(state, "invalid function pointer"); + hasInvalid = true; + } + } + } + + first = false; + free = res.second; + } while (free); + } + break; + } + case Instruction::PHI: { + ref<Expr> result = eval(ki, state.incomingBBIndex * 2, state); + bindLocal(ki, state, result); + break; + } + + // Special instructions + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(ki->inst); + assert(SI->getCondition() == SI->getOperand(0) && + "Wrong operand index!"); + ref<Expr> cond = eval(ki, 0, state); + ref<Expr> tExpr = eval(ki, 1, state); + ref<Expr> fExpr = eval(ki, 2, state); + ref<Expr> result = SelectExpr::create(cond, tExpr, fExpr); + bindLocal(ki, state, result); + break; + } + + case Instruction::VAArg: + terminateStateOnExecError(state, "unexpected VAArg instruction"); + break; + + // Arithmetic / logical +#define FP_CONSTANT_BINOP(op, type, l, r, target, state) \ + bindLocal(target, state, \ + ref<Expr>(op(toConstant(state, l, "floating point").getConstantValue(), \ + toConstant(state, r, "floating point").getConstantValue(), \ + type), type)) + case Instruction::Add: { + BinaryOperator *bi = cast<BinaryOperator>(i); + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + + if( bi->getType()->getTypeID() == llvm::Type::IntegerTyID ) { + bindLocal(ki, state, AddExpr::create(left, right)); + } else { + Expr::Width type = Expr::getWidthForLLVMType(bi->getType()); + FP_CONSTANT_BINOP(floats::add, type, left, right, ki, state); + } + + break; + } + + case Instruction::Sub: { + BinaryOperator *bi = cast<BinaryOperator>(i); + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + + if( bi->getType()->getTypeID() == llvm::Type::IntegerTyID ) { + bindLocal(ki, state, SubExpr::create(left, right)); + } else { + Expr::Width type = Expr::getWidthForLLVMType(bi->getType()); + FP_CONSTANT_BINOP(floats::sub, type, left, right, ki, state); + } + + break; + } + + case Instruction::Mul: { + BinaryOperator *bi = cast<BinaryOperator>(i); + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + + if( bi->getType()->getTypeID() == llvm::Type::IntegerTyID ) { + bindLocal(ki, state, MulExpr::create(left, right)); + } else { + Expr::Width type = Expr::getWidthForLLVMType(bi->getType()); + FP_CONSTANT_BINOP(floats::mul, type, left, right, ki, state); + } + + break; + } + + case Instruction::UDiv: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = UDivExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::SDiv: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = SDivExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::URem: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = URemExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::SRem: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = SRemExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::And: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = AndExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::Or: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = OrExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::Xor: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = XorExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::Shl: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = ShlExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::LShr: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = LShrExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case Instruction::AShr: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = AShrExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + // Compare + + case Instruction::ICmp: { + CmpInst *ci = cast<CmpInst>(i); + ICmpInst *ii = cast<ICmpInst>(ci); + + switch(ii->getPredicate()) { + case ICmpInst::ICMP_EQ: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = EqExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_NE: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = NeExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_UGT: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = UgtExpr::create(left, right); + bindLocal(ki, state,result); + break; + } + + case ICmpInst::ICMP_UGE: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = UgeExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_ULT: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = UltExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_ULE: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = UleExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_SGT: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = SgtExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_SGE: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = SgeExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_SLT: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = SltExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + case ICmpInst::ICMP_SLE: { + ref<Expr> left = eval(ki, 0, state); + ref<Expr> right = eval(ki, 1, state); + ref<Expr> result = SleExpr::create(left, right); + bindLocal(ki, state, result); + break; + } + + default: + terminateStateOnExecError(state, "invalid ICmp predicate"); + } + break; + } + + // Memory instructions... + case Instruction::Alloca: + case Instruction::Malloc: { + AllocationInst *ai = cast<AllocationInst>(i); + unsigned elementSize = + kmodule->targetData->getTypeStoreSize(ai->getAllocatedType()); + ref<Expr> size = Expr::createPointer(elementSize); + if (ai->isArrayAllocation()) { + // XXX coerce? + ref<Expr> count = eval(ki, 0, state); + size = MulExpr::create(count, size); + } + bool isLocal = i->getOpcode()==Instruction::Alloca; + executeAlloc(state, size, isLocal, ki); + break; + } + case Instruction::Free: { + executeFree(state, eval(ki, 0, state)); + break; + } + + case Instruction::Load: { + ref<Expr> base = eval(ki, 0, state); + executeMemoryOperation(state, false, base, 0, ki); + break; + } + case Instruction::Store: { + ref<Expr> base = eval(ki, 1, state); + ref<Expr> value = eval(ki, 0, state); + executeMemoryOperation(state, true, base, value, 0); + break; + } + + case Instruction::GetElementPtr: { + KGEPInstruction *kgepi = static_cast<KGEPInstruction*>(ki); + ref<Expr> base = eval(ki, 0, state); + + for (std::vector< std::pair<unsigned, unsigned> >::iterator + it = kgepi->indices.begin(), ie = kgepi->indices.end(); + it != ie; ++it) { + unsigned elementSize = it->second; + ref<Expr> index = eval(ki, it->first, state); + base = AddExpr::create(base, + MulExpr::create(Expr::createCoerceToPointerType(index), + Expr::createPointer(elementSize))); + } + if (kgepi->offset) + base = AddExpr::create(base, + Expr::createPointer(kgepi->offset)); + bindLocal(ki, state, base); + break; + } + + // Conversion + case Instruction::Trunc: { + CastInst *ci = cast<CastInst>(i); + ref<Expr> result = ExtractExpr::createByteOff(eval(ki, 0, state), + 0, + Expr::getWidthForLLVMType(ci->getType())); + bindLocal(ki, state, result); + break; + } + case Instruction::ZExt: { + CastInst *ci = cast<CastInst>(i); + ref<Expr> result = ZExtExpr::create(eval(ki, 0, state), + Expr::getWidthForLLVMType(ci->getType())); + bindLocal(ki, state, result); + break; + } + case Instruction::SExt: { + CastInst *ci = cast<CastInst>(i); + ref<Expr> result = SExtExpr::create(eval(ki, 0, state), + Expr::getWidthForLLVMType(ci->getType())); + bindLocal(ki, state, result); + break; + } + + case Instruction::IntToPtr: { + CastInst *ci = cast<CastInst>(i); + Expr::Width pType = Expr::getWidthForLLVMType(ci->getType()); + ref<Expr> arg = eval(ki, 0, state); + bindLocal(ki, state, ZExtExpr::create(arg, pType)); + break; + } + case Instruction::PtrToInt: { + CastInst *ci = cast<CastInst>(i); + Expr::Width iType = Expr::getWidthForLLVMType(ci->getType()); + ref<Expr> arg = eval(ki, 0, state); + bindLocal(ki, state, ZExtExpr::create(arg, iType)); + break; + } + + case Instruction::BitCast: { + ref<Expr> result = eval(ki, 0, state); + bindLocal(ki, state, result); + break; + } + + // Floating Point specific instructions + case Instruction::FPTrunc: { + FPTruncInst *fi = cast<FPTruncInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> arg = toConstant(state, eval(ki, 0, state), + "floating point"); + uint64_t value = floats::trunc(arg.getConstantValue(), + resultType, + arg.getWidth()); + ref<Expr> result(value, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::FPExt: { + FPExtInst *fi = cast<FPExtInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> arg = toConstant(state, eval(ki, 0, state), + "floating point"); + uint64_t value = floats::ext(arg.getConstantValue(), + resultType, + arg.getWidth()); + ref<Expr> result(value, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::FPToUI: { + FPToUIInst *fi = cast<FPToUIInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> arg = toConstant(state, eval(ki, 0, state), + "floating point"); + uint64_t value = floats::toUnsignedInt(arg.getConstantValue(), + resultType, + arg.getWidth()); + ref<Expr> result(value, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::FPToSI: { + FPToSIInst *fi = cast<FPToSIInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> arg = toConstant(state, eval(ki, 0, state), + "floating point"); + uint64_t value = floats::toSignedInt(arg.getConstantValue(), + resultType, + arg.getWidth()); + ref<Expr> result(value, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::UIToFP: { + UIToFPInst *fi = cast<UIToFPInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> arg = toConstant(state, eval(ki, 0, state), + "floating point"); + uint64_t value = floats::UnsignedIntToFP(arg.getConstantValue(), + resultType); + ref<Expr> result(value, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::SIToFP: { + SIToFPInst *fi = cast<SIToFPInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> arg = toConstant(state, eval(ki, 0, state), + "floating point"); + uint64_t value = floats::SignedIntToFP(arg.getConstantValue(), + resultType, + arg.getWidth()); + ref<Expr> result(value, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::FCmp: { + FCmpInst *fi = cast<FCmpInst>(i); + Expr::Width resultType = Expr::getWidthForLLVMType(fi->getType()); + ref<Expr> left = toConstant(state, eval(ki, 0, state), + "floating point"); + ref<Expr> right = toConstant(state, eval(ki, 1, state), + "floating point"); + uint64_t leftVal = left.getConstantValue(); + uint64_t rightVal = right.getConstantValue(); + + //determine whether the operands are NANs + unsigned inWidth = left.getWidth(); + bool leftIsNaN = floats::isNaN( leftVal, inWidth ); + bool rightIsNaN = floats::isNaN( rightVal, inWidth ); + + //handle NAN based on whether the predicate is "ordered" or "unordered" + uint64_t ret = (uint64_t)-1; + bool done = false; + switch( fi->getPredicate() ) { + //predicates which only care about whether or not the operands are NaNs + case FCmpInst::FCMP_ORD: + done = true; + ret = !leftIsNaN && !rightIsNaN; + break; + + case FCmpInst::FCMP_UNO: + done = true; + ret = leftIsNaN || rightIsNaN; + break; + + //ordered comparisons return false if either operand is NaN + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_ONE: + if( !leftIsNaN && !rightIsNaN) //only fall through and return false if there are NaN(s) + break; + + case FCmpInst::FCMP_FALSE: { //always return false for this predicate + done = true; + ret = false; + break; + } + + //unordered comparisons return true if either operand is NaN + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_UNE: + if( !leftIsNaN && !rightIsNaN) //only fall through and return true if there are NaN(s) + break; + + case FCmpInst::FCMP_TRUE: //always return true for this predicate + done = true; + ret = true; + + default: + case FCmpInst::BAD_FCMP_PREDICATE: /* will fall through and trigger fatal in the next switch */ + break; + } + + //if not done, then we need to actually do a comparison to get the result + if( !done ) { + switch( fi->getPredicate() ) { + //ordered comparisons return false if either operand is NaN + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + ret = floats::eq( leftVal, rightVal, inWidth ); + break; + + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_UGT: + ret = floats::gt( leftVal, rightVal, inWidth ); + break; + + case FCmpInst::FCMP_OGE: + case FCmpInst::FCMP_UGE: + ret = floats::ge( leftVal, rightVal, inWidth ); + break; + + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_ULT: + ret = floats::lt( leftVal, rightVal, inWidth ); + break; + + case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_ULE: + ret = floats::le( leftVal, rightVal, inWidth ); + break; + + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + ret = floats::ne( leftVal, rightVal, inWidth ); + break; + + default: + terminateStateOnExecError(state, "invalid FCmp predicate"); + } + } + + ref<Expr> result(ret, resultType); + bindLocal(ki, state, result); + break; + } + + case Instruction::FDiv: { + BinaryOperator *bi = cast<BinaryOperator>(i); + + ref<Expr> dividend = eval(ki, 0, state); + ref<Expr> divisor = eval(ki, 1, state); + Expr::Width type = Expr::getWidthForLLVMType(bi->getType()); + FP_CONSTANT_BINOP(floats::div, type, dividend, divisor, ki, state); + break; + } + + case Instruction::FRem: { + BinaryOperator *bi = cast<BinaryOperator>(i); + + ref<Expr> dividend = eval(ki, 0, state); + ref<Expr> divisor = eval(ki, 1, state); + Expr::Width type = Expr::getWidthForLLVMType(bi->getType()); + FP_CONSTANT_BINOP(floats::mod, type, dividend, divisor, ki, state); + break; + } + + + // Other instructions... + // Unhandled + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + terminateStateOnError(state, "XXX vector instructions unhandled", + "xxx.err"); + break; + + default: + terminateStateOnExecError(state, "invalid instruction"); + break; + } +} + +void Executor::updateStates(ExecutionState *current) { + if (searcher) { + searcher->update(current, addedStates, removedStates); + } + + states.insert(addedStates.begin(), addedStates.end()); + addedStates.clear(); + + for (std::set<ExecutionState*>::iterator + it = removedStates.begin(), ie = removedStates.end(); + it != ie; ++it) { + ExecutionState *es = *it; + std::set<ExecutionState*>::iterator it2 = states.find(es); + assert(it2!=states.end()); + states.erase(it2); + std::map<ExecutionState*, std::vector<SeedInfo> >::iterator it3 = + seedMap.find(es); + if (it3 != seedMap.end()) + seedMap.erase(it3); + processTree->remove(es->ptreeNode); + delete es; + } + removedStates.clear(); +} + +void Executor::bindInstructionConstants(KInstruction *KI) { + GetElementPtrInst *gepi = dyn_cast<GetElementPtrInst>(KI->inst); + if (!gepi) + return; + + KGEPInstruction *kgepi = static_cast<KGEPInstruction*>(KI); + ref<Expr> constantOffset = Expr::createPointer(0); + unsigned index = 1; + for (gep_type_iterator ii = gep_type_begin(gepi), ie = gep_type_end(gepi); + ii != ie; ++ii) { + if (const StructType *st = dyn_cast<StructType>(*ii)) { + const StructLayout *sl = + kmodule->targetData->getStructLayout(st); + const ConstantInt *ci = cast<ConstantInt>(ii.getOperand()); + ref<Expr> addend = Expr::createPointer(sl->getElementOffset((unsigned) + ci->getZExtValue())); + constantOffset = AddExpr::create(constantOffset, addend); + } else { + const SequentialType *st = cast<SequentialType>(*ii); + unsigned elementSize = + kmodule->targetData->getTypeStoreSize(st->getElementType()); + Value *operand = ii.getOperand(); + if (Constant *c = dyn_cast<Constant>(operand)) { + ref<Expr> index = evalConstant(c); + ref<Expr> addend = MulExpr::create(Expr::createCoerceToPointerType(index), + Expr::createPointer(elementSize)); + constantOffset = AddExpr::create(constantOffset, addend); + } else { + kgepi->indices.push_back(std::make_pair(index, elementSize)); + } + } + index++; + } + assert(constantOffset.isConstant()); + kgepi->offset = constantOffset.getConstantValue(); +} + +void Executor::bindModuleConstants() { + for (std::vector<KFunction*>::iterator it = kmodule->functions.begin(), + ie = kmodule->functions.end(); it != ie; ++it) { + KFunction *kf = *it; + for (unsigned i=0; i<kf->numInstructions; ++i) + bindInstructionConstants(kf->instructions[i]); + } + + kmodule->constantTable = new Cell[kmodule->constants.size()]; + for (unsigned i=0; i<kmodule->constants.size(); ++i) { + Cell &c = kmodule->constantTable[i]; + c.value = evalConstant(kmodule->constants[i]); + } +} + +void Executor::run(ExecutionState &initialState) { + bindModuleConstants(); + + // Delay init till now so that ticks don't accrue during + // optimization and such. + initTimers(); + + states.insert(&initialState); + + if (usingSeeds) { + std::vector<SeedInfo> &v = seedMap[&initialState]; + + for (std::vector<BOut*>::const_iterator it = usingSeeds->begin(), + ie = usingSeeds->end(); it != ie; ++it) + v.push_back(SeedInfo(*it)); + + int lastNumSeeds = usingSeeds->size()+10; + double lastTime, startTime = lastTime = util::getWallTime(); + ExecutionState *lastState = 0; + while (!seedMap.empty()) { + if (haltExecution) goto dump; + + std::map<ExecutionState*, std::vector<SeedInfo> >::iterator it = + seedMap.upper_bound(lastState); + if (it == seedMap.end()) + it = seedMap.begin(); + lastState = it->first; + unsigned numSeeds = it->second.size(); + ExecutionState &state = *lastState; + KInstruction *ki = state.pc; + stepInstruction(state); + + executeInstruction(state, ki); + processTimers(&state, MaxInstructionTime * numSeeds); + updateStates(&state); + + if ((stats::instructions % 1000) == 0) { + int numSeeds = 0, numStates = 0; + for (std::map<ExecutionState*, std::vector<SeedInfo> >::iterator + it = seedMap.begin(), ie = seedMap.end(); + it != ie; ++it) { + numSeeds += it->second.size(); + numStates++; + } + double time = util::getWallTime(); + if (SeedTime>0. && time > startTime + SeedTime) { + klee_warning("seed time expired, %d seeds remain over %d states", + numSeeds, numStates); + break; + } else if (numSeeds<=lastNumSeeds-10 || + time >= lastTime+10) { + lastTime = time; + lastNumSeeds = numSeeds; + klee_message("%d seeds remaining over: %d states", + numSeeds, numStates); + } + } + } + + klee_message("seeding done (%d states remain)", (int) states.size()); + + // XXX total hack, just because I like non uniform better but want + // seed results to be equally weighted. + for (std::set<ExecutionState*>::iterator + it = states.begin(), ie = states.end(); + it != ie; ++it) { + (*it)->weight = 1.; + } + + if (OnlySeed) + goto dump; + } + + searcher = constructUserSearcher(*this); + + searcher->update(0, states, std::set<ExecutionState*>()); + + while (!states.empty() && !haltExecution) { + ExecutionState &state = searcher->selectState(); + KInstruction *ki = state.pc; + stepInstruction(state); + + executeInstruction(state, ki); + processTimers(&state, MaxInstructionTime); + + if (MaxMemory) { + if ((stats::instructions & 0xFFFF) == 0) { + // We need to avoid calling GetMallocUsage() often because it + // is O(elts on freelist). This is really bad since we start + // to pummel the freelist once we hit the memory cap. + unsigned mbs = sys::Process::GetTotalMemoryUsage() >> 20; + + if (mbs > MaxMemory) { + if (mbs > MaxMemory + 100) { + // just guess at how many to kill + unsigned numStates = states.size(); + unsigned toKill = std::max(1U, numStates - numStates*MaxMemory/mbs); + + if (MaxMemoryInhibit) + klee_warning("killing %d states (over memory cap)", + toKill); + + std::vector<ExecutionState*> arr(states.begin(), states.end()); + for (unsigned i=0,N=arr.size(); N && i<toKill; ++i,--N) { + unsigned idx = rand() % N; + + // Make two pulls to try and not hit a state that + // covered new code. + if (arr[idx]->coveredNew) + idx = rand() % N; + + std::swap(arr[idx], arr[N-1]); + terminateStateEarly(*arr[N-1], "memory limit"); + } + } + atMemoryLimit = true; + } else { + atMemoryLimit = false; + } + } + } + + updateStates(&state); + } + + delete searcher; + searcher = 0; + + dump: + if (DumpStatesOnHalt && !states.empty()) { + llvm::cerr << "KLEE: halting execution, dumping remaining states\n"; + for (std::set<ExecutionState*>::iterator + it = states.begin(), ie = states.end(); + it != ie; ++it) { + ExecutionState &state = **it; + stepInstruction(state); // keep stats rolling + terminateStateEarly(state, "execution halting"); + } + updateStates(0); + } +} + +std::string Executor::getAddressInfo(ExecutionState &state, + ref<Expr> address) const{ + std::ostringstream info; + info << "\taddress: " << address << "\n"; + uint64_t example; + if (address.isConstant()) { + example = address.getConstantValue(); + } else { + ref<Expr> value; + bool success = solver->getValue(state, address, value); + assert(success && "FIXME: Unhandled solver failure"); + example = value.getConstantValue(); + info << "\texample: " << example << "\n"; + std::pair< ref<Expr>, ref<Expr> > res = solver->getRange(state, address); + info << "\trange: [" << res.first << ", " << res.second <<"]\n"; + } + + MemoryObject hack((unsigned) example); + MemoryMap::iterator lower = state.addressSpace.objects.upper_bound(&hack); + info << "\tnext: "; + if (lower==state.addressSpace.objects.end()) { + info << "none\n"; + } else { + const MemoryObject *mo = lower->first; + info << "object at " << mo->address + << " of size " << mo->size << "\n"; + } + if (lower!=state.addressSpace.objects.begin()) { + --lower; + info << "\tprev: "; + if (lower==state.addressSpace.objects.end()) { + info << "none\n"; + } else { + const MemoryObject *mo = lower->first; + info << "object at " << mo->address + << " of size " << mo->size << "\n"; + } + } + + return info.str(); +} + +void Executor::terminateState(ExecutionState &state) { + if (replayOut && replayPosition!=replayOut->numObjects) { + klee_warning_once(replayOut, "replay did not consume all objects in .bout input."); + } + + interpreterHandler->incPathsExplored(); + + std::set<ExecutionState*>::iterator it = addedStates.find(&state); + if (it==addedStates.end()) { + state.pc = state.prevPC; + + removedStates.insert(&state); + } else { + // never reached searcher, just delete immediately + std::map< ExecutionState*, std::vector<SeedInfo> >::iterator it3 = + seedMap.find(&state); + if (it3 != seedMap.end()) + seedMap.erase(it3); + addedStates.erase(it); + processTree->remove(state.ptreeNode); + delete &state; + } +} + +void Executor::terminateStateEarly(ExecutionState &state, std::string message) { + if (!OnlyOutputStatesCoveringNew || state.coveredNew || + (AlwaysOutputSeeds && seedMap.count(&state))) + interpreterHandler->processTestCase(state, (message + "\n").c_str(), "early"); + terminateState(state); +} + +void Executor::terminateStateOnExit(ExecutionState &state) { + if (!OnlyOutputStatesCoveringNew || state.coveredNew || + (AlwaysOutputSeeds && seedMap.count(&state))) + interpreterHandler->processTestCase(state, 0, 0); + terminateState(state); +} + +void Executor::terminateStateOnError(ExecutionState &state, + const std::string &message, + const std::string &suffix, + const std::string &info) { + static std::set< std::pair<Instruction*, std::string> > emittedErrors; + const InstructionInfo &ii = *state.prevPC->info; + + if (EmitAllErrors || + emittedErrors.insert(std::make_pair(state.prevPC->inst,message)).second) { + if (ii.file != "") { + klee_message("ERROR: %s:%d: %s", ii.file.c_str(), ii.line, message.c_str()); + } else { + klee_message("ERROR: %s", message.c_str()); + } + if (!EmitAllErrors) + klee_message("NOTE: now ignoring this error at this location"); + + std::ostringstream msg; + msg << "Error: " << message << "\n"; + if (ii.file != "") { + msg << "File: " << ii.file << "\n"; + msg << "Line: " << ii.line << "\n"; + } + msg << "Stack: \n"; + unsigned idx = 0; + const KInstruction *target = state.prevPC; + for (ExecutionState::stack_ty::reverse_iterator + it = state.stack.rbegin(), ie = state.stack.rend(); + it != ie; ++it) { + StackFrame &sf = *it; + Function *f = sf.kf->function; + const InstructionInfo &ii = *target->info; + msg << "\t#" << idx++ + << " " << std::setw(8) << std::setfill('0') << ii.assemblyLine + << " in " << f->getName() << " ("; + // Yawn, we could go up and print varargs if we wanted to. + unsigned index = 0; + for (Function::arg_iterator ai = f->arg_begin(), ae = f->arg_end(); + ai != ae; ++ai) { + if (ai!=f->arg_begin()) msg << ", "; + + msg << ai->getName(); + // XXX should go through function + ref<Expr> value = sf.locals[sf.kf->getArgRegister(index++)].value; + if (value.isConstant()) + msg << "=" << value; + } + msg << ")"; + if (ii.file != "") + msg << " at " << ii.file << ":" << ii.line; + msg << "\n"; + target = sf.caller; + } + + if (info != "") + msg << "Info: \n" << info; + interpreterHandler->processTestCase(state, msg.str().c_str(), suffix.c_str()); + } + + terminateState(state); +} + +// XXX shoot me +static const char *okExternalsList[] = { "printf", + "fprintf", + "puts", + "getpid" }; +static std::set<std::string> okExternals(okExternalsList, + okExternalsList + + (sizeof(okExternalsList)/sizeof(okExternalsList[0]))); + +void Executor::callExternalFunction(ExecutionState &state, + KInstruction *target, + Function *function, + std::vector< ref<Expr> > &arguments) { + // check if specialFunctionHandler wants it + if (specialFunctionHandler->handle(state, function, target, arguments)) + return; + + if (NoExternals && !okExternals.count(function->getName())) { + llvm::cerr << "KLEE:ERROR: Calling not-OK external function : " << function->getName() << "\n"; + terminateStateOnError(state, "externals disallowed", "user.err"); + return; + } + + // normal external function handling path + uint64_t *args = (uint64_t*) alloca(sizeof(*args) * (arguments.size() + 1)); + memset(args, 0, sizeof(*args) * (arguments.size() + 1)); + + unsigned i = 1; + for (std::vector<ref<Expr> >::iterator ai = arguments.begin(), ae = arguments.end(); + ai!=ae; ++ai, ++i) { + if (AllowExternalSymCalls) { // don't bother checking uniqueness + ref<Expr> ce; + bool success = solver->getValue(state, *ai, ce); + assert(success && "FIXME: Unhandled solver failure"); + static_cast<ConstantExpr*>(ce.get())->toMemory((void*) &args[i]); + } else { + ref<Expr> arg = toUnique(state, *ai); + if (arg.isConstant()) { + // XXX kick toMemory functions from here + static_cast<ConstantExpr*>(arg.get())->toMemory((void*) &args[i]); + } else { + std::string msg = "external call with symbolic argument: " + function->getName(); + terminateStateOnExecError(state, msg); + return; + } + } + } + + state.addressSpace.copyOutConcretes(); + + if (!SuppressExternalWarnings) { + std::ostringstream os; + os << "calling external: " << function->getName().c_str() << "("; + for (unsigned i=0; i<arguments.size(); i++) { + os << arguments[i]; + if (i != arguments.size()-1) + os << ", "; + } + os << ")"; + + if (AllExternalWarnings) + klee_warning("%s", os.str().c_str()); + else + klee_warning_once(function, "%s", os.str().c_str()); + } + + bool success = externalDispatcher->executeCall(function, target->inst, args); + if (!success) { + terminateStateOnError(state, "failed external call: " + function->getName(), "external.err"); + return; + } + + if (!state.addressSpace.copyInConcretes()) { + terminateStateOnError(state, "external modified read-only object", "external.err"); + return; + } + + const Type *resultType = target->inst->getType(); + if (resultType != Type::VoidTy) { + ref<Expr> e = ConstantExpr::fromMemory((void*) args, + Expr::getWidthForLLVMType(resultType)); + bindLocal(target, state, e); + } +} + +/***/ + +ref<Expr> Executor::replaceReadWithSymbolic(ExecutionState &state, + ref<Expr> e) { + unsigned n = interpreterOpts.MakeConcreteSymbolic; + if (!n || replayOut || replayPath) + return e; + + // right now, we don't replace symbolics (is there any reason too?) + if (!e.isConstant()) + return e; + + if (n != 1 && random() % n) + return e; + + // create a new fresh location, assert it is equal to concrete value in e + // and return it. + + const MemoryObject *mo = memory->allocate(Expr::getMinBytesForWidth(e.getWidth()), + false, false, + state.prevPC->inst); + assert(mo && "out of memory"); + ref<Expr> res = Expr::createTempRead(mo->array, e.getWidth()); + ref<Expr> eq = NotOptimizedExpr::create(EqExpr::create(e, res)); + llvm::cerr << "Making symbolic: " << eq << "\n"; + state.addConstraint(eq); + return res; +} + +ObjectState *Executor::bindObjectInState(ExecutionState &state, const MemoryObject *mo, + bool isLocal) { + ObjectState *os = new ObjectState(mo, mo->size); + state.addressSpace.bindObject(mo, os); + + // Its possible that multiple bindings of the same mo in the state + // will put multiple copies on this list, but it doesn't really + // matter because all we use this list for is to unbind the object + // on function return. + if (isLocal) + state.stack.back().allocas.push_back(mo); + + return os; +} + +void Executor::executeAllocN(ExecutionState &state, + uint64_t nelems, + uint64_t size, + uint64_t alignment, + bool isLocal, + KInstruction *target) { +#if 0 + // over-allocate so that we can properly align the whole buffer + uint64_t address = (uint64_t) (unsigned) malloc(nelems * size + alignment - 1); + address += (alignment - address % alignment); +#else + theMMap = + mmap((void*) 0x90000000, + nelems*size, PROT_READ|PROT_WRITE, + MAP_PRIVATE +#ifdef MAP_ANONYMOUS + |MAP_ANONYMOUS +#endif + , 0, 0); + uint64_t address = (uintptr_t) theMMap; + theMMapSize = nelems*size; +#endif + + for (unsigned i = 0; i < nelems; i++) { + MemoryObject *mo = memory->allocateFixed(address + i*size, size, state.prevPC->inst); + ObjectState *os = bindObjectInState(state, mo, isLocal); + os->initializeToRandom(); + + // bind the local to the first memory object in the whole array + if (i == 0) + bindLocal(target, state, mo->getBaseExpr()); + } + + llvm::cerr << "KLEE: allocN at: " << address << "\n"; +} + +void Executor::executeAlloc(ExecutionState &state, + ref<Expr> size, + bool isLocal, + KInstruction *target, + bool zeroMemory, + const ObjectState *reallocFrom) { + size = toUnique(state, size); + if (size.isConstant()) { + MemoryObject *mo = memory->allocate(size.getConstantValue(), isLocal, false, + state.prevPC->inst); + if (!mo) { + bindLocal(target, state, ref<Expr>(0, kMachinePointerType)); + } else { + ObjectState *os = bindObjectInState(state, mo, isLocal); + if (zeroMemory) { + os->initializeToZero(); + } else { + os->initializeToRandom(); + } + bindLocal(target, state, mo->getBaseExpr()); + + if (reallocFrom) { + unsigned count = std::min(reallocFrom->size, os->size); + for (unsigned i=0; i<count; i++) + os->write(i, reallocFrom->read8(i)); + state.addressSpace.unbindObject(reallocFrom->getObject()); + } + } + } else { + // XXX For now we just pick a size. Ideally we would support + // symbolic sizes fully but even if we don't it would be better to + // "smartly" pick a value, for example we could fork and pick the + // min and max values and perhaps some intermediate (reasonable + // value). + // + // It would also be nice to recognize the case when size has + // exactly two values and just fork (but we need to get rid of + // return argument first). This shows up in pcre when llvm + // collapses the size expression with a select. + + ref<Expr> example; + bool success = solver->getValue(state, size, example); + assert(success && "FIXME: Unhandled solver failure"); + + // Try and start with a small example + while (example.getConstantValue()>128) { + ref<Expr> tmp = ref<Expr>(example.getConstantValue() >> 1, + example.getWidth()); + bool res; + bool success = solver->mayBeTrue(state, EqExpr::create(tmp, size), res); + assert(success && "FIXME: Unhandled solver failure"); + if (!res) + break; + example = tmp; + } + + StatePair fixedSize = fork(state, EqExpr::create(example, size), true); + + if (fixedSize.second) { + // Check for exactly two values + ref<Expr> tmp; + bool success = solver->getValue(*fixedSize.second, size, tmp); + assert(success && "FIXME: Unhandled solver failure"); + bool res; + success = solver->mustBeTrue(*fixedSize.second, + EqExpr::create(tmp, size), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + executeAlloc(*fixedSize.second, tmp, isLocal, + target, zeroMemory, reallocFrom); + } else { + // See if a *really* big value is possible. If so assume + // malloc will fail for it, so lets fork and return 0. + StatePair hugeSize = fork(*fixedSize.second, + UltExpr::create(ref<Expr>(1<<31, Expr::Int32), size), + true); + if (hugeSize.first) { + klee_message("NOTE: found huge malloc, returing 0"); + bindLocal(target, *hugeSize.first, ref<Expr>(0,kMachinePointerType)); + } + + if (hugeSize.second) { + std::ostringstream info; + ExprPPrinter::printOne(info, " size expr", size); + info << " concretization : " << example << "\n"; + info << " unbound example: " << tmp << "\n"; + terminateStateOnError(*hugeSize.second, + "concretized symbolic size", + "model.err", + info.str()); + } + } + } + + if (fixedSize.first) // can be zero when fork fails + executeAlloc(*fixedSize.first, example, isLocal, + target, zeroMemory, reallocFrom); + } +} + +void Executor::executeFree(ExecutionState &state, + ref<Expr> address, + KInstruction *target) { + StatePair zeroPointer = fork(state, Expr::createIsZero(address), true); + if (zeroPointer.first) { + if (target) + bindLocal(target, *zeroPointer.first, Expr::createPointer(0)); + } + if (zeroPointer.second) { // address != 0 + ExactResolutionList rl; + resolveExact(*zeroPointer.second, address, rl, "free"); + + for (Executor::ExactResolutionList::iterator it = rl.begin(), + ie = rl.end(); it != ie; ++it) { + const MemoryObject *mo = it->first.first; + if (mo->isLocal) { + terminateStateOnError(*it->second, + "free of alloca", + "free.err", + getAddressInfo(*it->second, address)); + } else if (mo->isGlobal) { + terminateStateOnError(*it->second, + "free of global", + "free.err", + getAddressInfo(*it->second, address)); + } else { + it->second->addressSpace.unbindObject(mo); + if (target) + bindLocal(target, *it->second, Expr::createPointer(0)); + } + } + } +} + +void Executor::resolveExact(ExecutionState &state, + ref<Expr> p, + ExactResolutionList &results, + const std::string &name) { + // XXX we may want to be capping this? + ResolutionList rl; + state.addressSpace.resolve(state, solver, p, rl); + + ExecutionState *unbound = &state; + for (ResolutionList::iterator it = rl.begin(), ie = rl.end(); + it != ie; ++it) { + ref<Expr> inBounds = EqExpr::create(p, it->first->getBaseExpr()); + + StatePair branches = fork(*unbound, inBounds, true); + + if (branches.first) + results.push_back(std::make_pair(*it, branches.first)); + + unbound = branches.second; + if (!unbound) // Fork failure + break; + } + + if (unbound) { + terminateStateOnError(*unbound, + "memory error: invalid pointer: " + name, + "ptr.err", + getAddressInfo(*unbound, p)); + } +} + +void Executor::executeMemoryOperation(ExecutionState &state, + bool isWrite, + ref<Expr> address, + ref<Expr> value /* undef if read */, + KInstruction *target /* undef if write */) { + Expr::Width type = (isWrite ? value.getWidth() : + Expr::getWidthForLLVMType(target->inst->getType())); + unsigned bytes = Expr::getMinBytesForWidth(type); + + if (SimplifySymIndices) { + if (!address.isConstant()) + address = state.constraints.simplifyExpr(address); + if (isWrite && !value.isConstant()) + value = state.constraints.simplifyExpr(value); + } + + // fast path: single in-bounds resolution + ObjectPair op; + bool success; + solver->setTimeout(stpTimeout); + if (!state.addressSpace.resolveOne(state, solver, address, op, success)) { + address = toConstant(state, address, "resolveOne failure"); + success = state.addressSpace.resolveOne(address.getConstantValue(), op); + } + solver->setTimeout(0); + + if (success) { + const MemoryObject *mo = op.first; + + if (MaxSymArraySize && mo->size>=MaxSymArraySize) { + address = toConstant(state, address, "max-sym-array-size"); + } + + ref<Expr> offset = mo->getOffsetExpr(address); + + bool inBounds; + solver->setTimeout(stpTimeout); + bool success = solver->mustBeTrue(state, + mo->getBoundsCheckOffset(offset, bytes), + inBounds); + solver->setTimeout(0); + if (!success) { + state.pc = state.prevPC; + terminateStateEarly(state, "query timed out"); + return; + } + + if (inBounds) { + const ObjectState *os = op.second; + if (isWrite) { + if (os->readOnly) { + terminateStateOnError(state, + "memory error: object read only", + "readonly.err"); + } else { + ObjectState *wos = state.addressSpace.getWriteable(mo, os); + wos->write(offset, value); + } + } else { + ref<Expr> result = os->read(offset, type); + + if (interpreterOpts.MakeConcreteSymbolic) + result = replaceReadWithSymbolic(state, result); + + bindLocal(target, state, result); + } + + return; + } + } + + // we are on an error path (no resolution, multiple resolution, one + // resolution with out of bounds) + + ResolutionList rl; + solver->setTimeout(stpTimeout); + bool incomplete = state.addressSpace.resolve(state, solver, address, rl, + 0, stpTimeout); + solver->setTimeout(0); + + // XXX there is some query wasteage here. who cares? + ExecutionState *unbound = &state; + + for (ResolutionList::iterator i = rl.begin(), ie = rl.end(); i != ie; ++i) { + const MemoryObject *mo = i->first; + const ObjectState *os = i->second; + ref<Expr> inBounds = mo->getBoundsCheckPointer(address, bytes); + + StatePair branches = fork(*unbound, inBounds, true); + ExecutionState *bound = branches.first; + + // bound can be 0 on failure or overlapped + if (bound) { + if (isWrite) { + if (os->readOnly) { + terminateStateOnError(*bound, + "memory error: object read only", + "readonly.err"); + } else { + ObjectState *wos = bound->addressSpace.getWriteable(mo, os); + wos->write(mo->getOffsetExpr(address), value); + } + } else { + ref<Expr> result = os->read(mo->getOffsetExpr(address), type); + bindLocal(target, *bound, result); + } + } + + unbound = branches.second; + if (!unbound) + break; + } + + // XXX should we distinguish out of bounds and overlapped cases? + if (unbound) { + if (incomplete) { + terminateStateEarly(*unbound, "query timed out (resolve)"); + } else { + terminateStateOnError(*unbound, + "memory error: out of bound pointer", + "ptr.err", + getAddressInfo(*unbound, address)); + } + } +} + +void Executor::executeMakeSymbolic(ExecutionState &state, + const MemoryObject *mo) { + // make a new one and rebind, we use bind here because we want to + // create a flat out new state, not a copy. although I'm not really + // sure it matters. + ObjectState *os = bindObjectInState(state, mo, false); + if (!replayOut) { + os->makeSymbolic(); + state.addSymbolic(mo); + + std::map< ExecutionState*, std::vector<SeedInfo> >::iterator it = + seedMap.find(&state); + if (it!=seedMap.end()) { // In seed mode we need to add this as a + // binding. + for (std::vector<SeedInfo>::iterator siit = it->second.begin(), + siie = it->second.end(); siit != siie; ++siit) { + SeedInfo &si = *siit; + BOutObject *obj = si.getNextInput(mo, + NamedSeedMatching); + + if (!obj) { + if (ZeroSeedExtension) { + std::vector<unsigned char> &values = + si.assignment.bindings[mo->array]; + values = std::vector<unsigned char>(mo->size, '\0'); + } else if (!AllowSeedExtension) { + terminateStateOnError(state, + "ran out of inputs during seeding", + "user.err"); + break; + } + } else { + if (obj->numBytes != mo->size && + ((!(AllowSeedExtension || ZeroSeedExtension) + && obj->numBytes < mo->size) || + (!AllowSeedTruncation && obj->numBytes > mo->size))) { + std::stringstream msg; + msg << "replace size mismatch: " + << mo->name << "[" << mo->size << "]" + << " vs " << obj->name << "[" << obj->numBytes << "]" + << " in bout\n"; + + terminateStateOnError(state, + msg.str(), + "user.err"); + break; + } else { + std::vector<unsigned char> &values = + si.assignment.bindings[mo->array]; + values.insert(values.begin(), obj->bytes, + obj->bytes + std::min(obj->numBytes, mo->size)); + if (ZeroSeedExtension) { + for (unsigned i=obj->numBytes; i<mo->size; ++i) + values.push_back('\0'); + } + } + } + } + } + } else { + if (replayPosition >= replayOut->numObjects) { + terminateStateOnError(state, "replay count mismatch", "user.err"); + } else { + BOutObject *obj = &replayOut->objects[replayPosition++]; + if (obj->numBytes != mo->size) { + terminateStateOnError(state, "replay size mismatch", "user.err"); + } else { + for (unsigned i=0; i<mo->size; i++) + os->write8(i, obj->bytes[i]); + } + } + } +} + +/***/ + +void Executor::runFunctionAsMain(Function *f, + int argc, + char **argv, + char **envp) { + std::vector<ref<Expr> > arguments; + + // force deterministic initialization of memory objects + srand(1); + srandom(1); + + MemoryObject *argvMO = 0; + + // In order to make uclibc happy and be closer to what the system is + // doing we lay out the environments at the end of the argv array + // (both are terminated by a null). There is also a final terminating + // null that uclibc seems to expect, possibly the ELF header? + + int envc; + for (envc=0; envp[envc]; ++envc) ; + + KFunction *kf = kmodule->functionMap[f]; + assert(kf); + Function::arg_iterator ai = f->arg_begin(), ae = f->arg_end(); + if (ai!=ae) { + arguments.push_back(ref<Expr>(argc, Expr::Int32)); + + if (++ai!=ae) { + argvMO = memory->allocate((argc+1+envc+1+1) * kMachinePointerSize, false, true, + f->begin()->begin()); + + arguments.push_back(argvMO->getBaseExpr()); + + if (++ai!=ae) { + uint64_t envp_start = argvMO->address + (argc+1)*kMachinePointerSize; + arguments.push_back(Expr::createPointer(envp_start)); + + if (++ai!=ae) + klee_error("invalid main function (expect 0-3 arguments)"); + } + } + } + + ExecutionState *state = new ExecutionState(kmodule->functionMap[f]); + + if (pathWriter) + state->pathOS = pathWriter->open(); + if (symPathWriter) + state->symPathOS = symPathWriter->open(); + + + if (statsTracker) + statsTracker->framePushed(*state, 0); + + assert(arguments.size() == f->arg_size() && "wrong number of arguments"); + for (unsigned i = 0, e = f->arg_size(); i != e; ++i) + bindArgument(kf, i, *state, arguments[i]); + + if (argvMO) { + ObjectState *argvOS = bindObjectInState(*state, argvMO, false); + + for (int i=0; i<argc+1+envc+1+1; i++) { + MemoryObject *arg; + + if (i==argc || i>=argc+1+envc) { + arg = 0; + } else { + char *s = i<argc ? argv[i] : envp[i-(argc+1)]; + int j, len = strlen(s); + + arg = memory->allocate(len+1, false, true, state->pc->inst); + ObjectState *os = bindObjectInState(*state, arg, false); + for (j=0; j<len+1; j++) + os->write8(j, s[j]); + } + + if (arg) { + argvOS->write(i * kMachinePointerSize, arg->getBaseExpr()); + } else { + argvOS->write(i * kMachinePointerSize, Expr::createPointer(0)); + } + } + } + + initializeGlobals(*state); + + processTree = new PTree(state); + state->ptreeNode = processTree->root; + run(*state); + delete processTree; + processTree = 0; + + // hack to clear memory objects + delete memory; + memory = new MemoryManager(); + + globalObjects.clear(); + globalAddresses.clear(); + + if (statsTracker) + statsTracker->done(); + + if (theMMap) { + munmap(theMMap, theMMapSize); + theMMap = 0; + } +} + +unsigned Executor::getPathStreamID(const ExecutionState &state) { + assert(pathWriter); + return state.pathOS.getID(); +} + +unsigned Executor::getSymbolicPathStreamID(const ExecutionState &state) { + assert(symPathWriter); + return state.symPathOS.getID(); +} + +void Executor::getConstraintLog(const ExecutionState &state, + std::string &res, + bool asCVC) { + if (asCVC) { + Query query(state.constraints, ref<Expr>(0, Expr::Bool)); + char *log = solver->stpSolver->getConstraintLog(query); + res = std::string(log); + free(log); + } else { + std::ostringstream info; + ExprPPrinter::printConstraints(info, state.constraints); + res = info.str(); + } +} + +bool Executor::getSymbolicSolution(const ExecutionState &state, + std::vector< + std::pair<std::string, + std::vector<unsigned char> > > + &res) { + solver->setTimeout(stpTimeout); + + ExecutionState tmp(state); + if (!NoPreferCex) { + for (std::vector<const MemoryObject*>::const_iterator + it = state.symbolics.begin(), ie = state.symbolics.end(); + it != ie; ++it) { + const MemoryObject *mo = *it; + std::vector< ref<Expr> >::const_iterator pi = + mo->cexPreferences.begin(), pie = mo->cexPreferences.end(); + for (; pi != pie; ++pi) { + bool mustBeTrue; + bool success = solver->mustBeTrue(tmp, Expr::createNot(*pi), + mustBeTrue); + if (!success) break; + if (!mustBeTrue) tmp.addConstraint(*pi); + } + if (pi!=pie) break; + } + } + + std::vector< std::vector<unsigned char> > values; + std::vector<const Array*> objects; + for (unsigned i = 0; i != state.symbolics.size(); ++i) + objects.push_back(state.symbolics[i]->array); + bool success = solver->getInitialValues(tmp, objects, values); + solver->setTimeout(0); + if (!success) { + klee_warning("unable to compute initial values (invalid constraints?)!"); + ExprPPrinter::printQuery(std::cerr, + state.constraints, + ref<Expr>(0,Expr::Bool)); + return false; + } + + unsigned i = 0; + for (std::vector<const MemoryObject*>::const_iterator + it = state.symbolics.begin(), ie = state.symbolics.end(); + it != ie; ++it) { + res.push_back(std::make_pair((*it)->name, values[i])); + ++i; + } + return true; +} + +void Executor::getCoveredLines(const ExecutionState &state, + std::map<const std::string*, std::set<unsigned> > &res) { + res = state.coveredLines; +} + +void Executor::doImpliedValueConcretization(ExecutionState &state, + ref<Expr> e, + ref<Expr> value) { + assert(value.isConstant() && "non-constant passed in place of constant"); + + if (DebugCheckForImpliedValues) + ImpliedValue::checkForImpliedValues(solver->solver, e, value); + + ImpliedValueList results; + ImpliedValue::getImpliedValues(e, value, results); + for (ImpliedValueList::iterator it = results.begin(), ie = results.end(); + it != ie; ++it) { + ReadExpr *re = it->first.get(); + + if (re->index.isConstant()) { + // FIXME: This is the sole remaining usage of the Array object + // variable. Kill me. + const MemoryObject *mo = re->updates.root->object; + const ObjectState *os = state.addressSpace.findObject(mo); + + if (!os) { + // object has been free'd, no need to concretize (although as + // in other cases we would like to concretize the outstanding + // reads, but we have no facility for that yet) + } else { + assert(!os->readOnly && "not possible? read only object with static read?"); + ObjectState *wos = state.addressSpace.getWriteable(mo, os); + wos->write(re->index.getConstantValue(), it->second); + } + } + } +} + +/// + +Interpreter *Interpreter::create(const InterpreterOptions &opts, + InterpreterHandler *ih) { + return new Executor(opts, ih); +} diff --git a/lib/Core/Executor.h b/lib/Core/Executor.h new file mode 100644 index 00000000..76868291 --- /dev/null +++ b/lib/Core/Executor.h @@ -0,0 +1,445 @@ +//===-- Executor.h ----------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Class to perform actual execution, hides implementation details from external +// interpreter. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_EXECUTOR_H +#define KLEE_EXECUTOR_H + +#include "klee/Interpreter.h" +#include "llvm/Support/CallSite.h" +#include <vector> +#include <string> +#include <map> +#include <set> + +struct BOut; + +namespace llvm { + class BasicBlock; + class BranchInst; + class CallInst; + class Constant; + class ConstantExpr; + class Function; + class GlobalValue; + class Instruction; + class TargetData; + class Value; +} + +namespace klee { + class ExecutionState; + class ExternalDispatcher; + class Expr; + class InstructionInfoTable; + class KFunction; + class KInstruction; + class KInstIterator; + class KModule; + class MemoryManager; + class MemoryObject; + class ObjectState; + class PTree; + class Searcher; + class SeedInfo; + class SpecialFunctionHandler; + class StackFrame; + class StatsTracker; + class TimingSolver; + class TreeStreamWriter; + template<class T> class ref; + + /// \todo Add a context object to keep track of data only live + /// during an instruction step. Should contain addedStates, + /// removedStates, and haltExecution, among others. + +class Executor : public Interpreter { + friend class BumpMergingSearcher; + friend class MergingSearcher; + friend class RandomPathSearcher; + friend class OwningSearcher; + friend class WeightedRandomSearcher; + friend class SpecialFunctionHandler; + friend class StatsTracker; + +public: + class Timer { + public: + Timer(); + virtual ~Timer(); + + /// The event callback. + virtual void run() = 0; + }; + + typedef std::pair<ExecutionState*,ExecutionState*> StatePair; + +private: + class TimerInfo; + + KModule *kmodule; + InterpreterHandler *interpreterHandler; + Searcher *searcher; + + ExternalDispatcher *externalDispatcher; + TimingSolver *solver; + MemoryManager *memory; + std::set<ExecutionState*> states; + StatsTracker *statsTracker; + TreeStreamWriter *pathWriter, *symPathWriter; + SpecialFunctionHandler *specialFunctionHandler; + std::vector<TimerInfo*> timers; + PTree *processTree; + + /// Used to track states that have been added during the current + /// instructions step. + /// \invariant \ref addedStates is a subset of \ref states. + /// \invariant \ref addedStates and \ref removedStates are disjoint. + std::set<ExecutionState*> addedStates; + /// Used to track states that have been removed during the current + /// instructions step. + /// \invariant \ref removedStates is a subset of \ref states. + /// \invariant \ref addedStates and \ref removedStates are disjoint. + std::set<ExecutionState*> removedStates; + + /// When non-empty the Executor is running in "seed" mode. The + /// states in this map will be executed in an arbitrary order + /// (outside the normal search interface) until they terminate. When + /// the states reach a symbolic branch then either direction that + /// satisfies one or more seeds will be added to this map. What + /// happens with other states (that don't satisfy the seeds) depends + /// on as-yet-to-be-determined flags. + std::map<ExecutionState*, std::vector<SeedInfo> > seedMap; + + /// Map of globals to their representative memory object. + std::map<const llvm::GlobalValue*, MemoryObject*> globalObjects; + + /// Map of globals to their bound address. This also includes + /// globals that have no representative object (i.e. functions). + std::map<const llvm::GlobalValue*, ref<Expr> > globalAddresses; + + /// The set of legal function addresses, used to validate function + /// pointers. + std::set<void*> legalFunctions; + + /// When non-null the bindings that will be used for calls to + /// klee_make_symbolic in order replay. + const struct BOut *replayOut; + /// When non-null a list of branch decisions to be used for replay. + const std::vector<bool> *replayPath; + /// The index into the current \ref replayOut or \ref replayPath + /// object. + unsigned replayPosition; + + /// When non-null a list of "seed" inputs which will be used to + /// drive execution. + const std::vector<struct BOut *> *usingSeeds; + + /// Disables forking, instead a random path is chosen. Enabled as + /// needed to control memory usage. \see fork() + bool atMemoryLimit; + + /// Disables forking, set by client. \see setInhibitForking() + bool inhibitForking; + + /// Signals the executor to halt execution at the next instruction + /// step. + bool haltExecution; + + /// Whether implied-value concretization is enabled. Currently + /// false, it is buggy (it needs to validate its writes). + bool ivcEnabled; + + /// The maximum time to allow for a single stp query. + double stpTimeout; + + llvm::Function* getCalledFunction(llvm::CallSite &cs, ExecutionState &state); + + void executeInstruction(ExecutionState &state, KInstruction *ki); + + void printFileLine(ExecutionState &state, KInstruction *ki); + + void run(ExecutionState &initialState); + + // Given a concrete object in our [klee's] address space, add it to + // objects checked code can reference. + MemoryObject *addExternalObject(ExecutionState &state, void *addr, + unsigned size, bool isReadOnly); + + void initializeGlobalObject(ExecutionState &state, ObjectState *os, + llvm::Constant *c, + unsigned offset); + void initializeGlobals(ExecutionState &state); + + void stepInstruction(ExecutionState &state); + void updateStates(ExecutionState *current); + void transferToBasicBlock(llvm::BasicBlock *dst, + llvm::BasicBlock *src, + ExecutionState &state); + + void callExternalFunction(ExecutionState &state, + KInstruction *target, + llvm::Function *function, + std::vector< ref<Expr> > &arguments); + + ObjectState *bindObjectInState(ExecutionState &state, const MemoryObject *mo, + bool isLocal); + + /// Resolve a pointer to the memory objects it could point to the + /// start of, forking execution when necessary and generating errors + /// for pointers to invalid locations (either out of bounds or + /// address inside the middle of objects). + /// + /// \param results[out] A list of ((MemoryObject,ObjectState), + /// state) pairs for each object the given address can point to the + /// beginning of. + typedef std::vector< std::pair<std::pair<const MemoryObject*, const ObjectState*>, + ExecutionState*> > ExactResolutionList; + void resolveExact(ExecutionState &state, + ref<Expr> p, + ExactResolutionList &results, + const std::string &name); + + /// Allocate and bind a new object in a particular state. NOTE: This + /// function may fork. + /// + /// \param isLocal Flag to indicate if the object should be + /// automatically deallocated on function return (this also makes it + /// illegal to free directly). + /// + /// \param target Value at which to bind the base address of the new + /// object. + /// + /// \param reallocFrom If non-zero and the allocation succeeds, + /// initialize the new object from the given one and unbind it when + /// done (realloc semantics). The initialized bytes will be the + /// minimum of the size of the old and new objects, with remaining + /// bytes initialized as specified by zeroMemory. + void executeAlloc(ExecutionState &state, + ref<Expr> size, + bool isLocal, + KInstruction *target, + bool zeroMemory=false, + const ObjectState *reallocFrom=0); + + /// XXX not for public use (this is for histar, it allocations a + /// contiguous set of objects, while guaranteeing page alignment) + void executeAllocN(ExecutionState &state, + uint64_t nelems, + uint64_t size, + uint64_t alignment, + bool isLocal, + KInstruction *target); + + /// Free the given address with checking for errors. If target is + /// given it will be bound to 0 in the resulting states (this is a + /// convenience for realloc). Note that this function can cause the + /// state to fork and that \ref state cannot be safely accessed + /// afterwards. + void executeFree(ExecutionState &state, + ref<Expr> address, + KInstruction *target = 0); + + void executeCall(ExecutionState &state, + KInstruction *ki, + llvm::Function *f, + std::vector< ref<Expr> > &arguments); + + // do address resolution / object binding / out of bounds checking + // and perform the operation + void executeMemoryOperation(ExecutionState &state, + bool isWrite, + ref<Expr> address, + ref<Expr> value /* undef if read */, + KInstruction *target /* undef if write */); + + void executeMakeSymbolic(ExecutionState &state, const MemoryObject *mo); + + /// Create a new state where each input condition has been added as + /// a constraint and return the results. The input state is included + /// as one of the results. Note that the output vector may included + /// NULL pointers for states which were unable to be created. + void branch(ExecutionState &state, + const std::vector< ref<Expr> > &conditions, + std::vector<ExecutionState*> &result); + + // Fork current and return states in which condition holds / does + // not hold, respectively. One of the states is necessarily the + // current state, and one of the states may be null. + StatePair fork(ExecutionState ¤t, ref<Expr> condition, bool isInternal); + + /// Add the given (boolean) condition as a constraint on state. This + /// function is a wrapper around the state's addConstraint function + /// which also manages manages propogation of implied values, + /// validity checks, and seed patching. + void addConstraint(ExecutionState &state, ref<Expr> condition); + + // Called on [for now] concrete reads, replaces constant with a symbolic + // Used for testing. + ref<Expr> replaceReadWithSymbolic(ExecutionState &state, ref<Expr> e); + + ref<Expr> eval(KInstruction *ki, + unsigned index, + ExecutionState &state); + + void bindLocal(KInstruction *target, + ExecutionState &state, + ref<Expr> value); + void bindArgument(KFunction *kf, + unsigned index, + ExecutionState &state, + ref<Expr> value); + + ref<Expr> evalConstantExpr(llvm::ConstantExpr *ce); + + /// Return a unique constant value for the given expression in the + /// given state, if it has one (i.e. it provably only has a single + /// value). Otherwise return the original expression. + ref<Expr> toUnique(const ExecutionState &state, ref<Expr> &e); + + /// Return a constant value for the given expression, forcing it to + /// be constant in the given state by adding a constraint if + /// necessary. Note that this function breaks completeness and + /// should generally be avoided. + /// + /// \param purpose An identify string to printed in case of concretization. + ref<Expr> toConstant(ExecutionState &state, ref<Expr> e, const char *purpose); + + /// Bind a constant value for e to the given target. NOTE: This + /// function may fork state if the state has multiple seeds. + void executeGetValue(ExecutionState &state, ref<Expr> e, KInstruction *target); + + /// Get textual information regarding a memory address. + std::string getAddressInfo(ExecutionState &state, ref<Expr> address) const; + + // remove state from queue and delete + void terminateState(ExecutionState &state); + // call exit handler and terminate state + void terminateStateEarly(ExecutionState &state, std::string message); + // call exit handler and terminate state + void terminateStateOnExit(ExecutionState &state); + // call error handler and terminate state + void terminateStateOnError(ExecutionState &state, + const std::string &message, + const std::string &suffix, + const std::string &longMessage=""); + + // call error handler and terminate state, for execution errors + // (things that should not be possible, like illegal instruction or + // unlowered instrinsic, or are unsupported, like inline assembly) + void terminateStateOnExecError(ExecutionState &state, + const std::string &message, + const std::string &info="") { + terminateStateOnError(state, message, "exec.err", info); + } + + /// bindModuleConstants - Initialize the module constant table. + void bindModuleConstants(); + + /// bindInstructionConstants - Initialize any necessary per instruction + /// constant values. + void bindInstructionConstants(KInstruction *KI); + + void handlePointsToObj(ExecutionState &state, + KInstruction *target, + const std::vector<ref<Expr> > &arguments); + + void doImpliedValueConcretization(ExecutionState &state, + ref<Expr> e, + ref<Expr> value); + + /// Add a timer to be executed periodically. + /// + /// \param timer The timer object to run on firings. + /// \param rate The approximate delay (in seconds) between firings. + void addTimer(Timer *timer, double rate); + + void initTimers(); + void processTimers(ExecutionState *current, + double maxInstTime); + +public: + Executor(const InterpreterOptions &opts, InterpreterHandler *ie); + virtual ~Executor(); + + const InterpreterHandler& getHandler() { + return *interpreterHandler; + } + + // XXX should just be moved out to utility module + ref<Expr> evalConstant(llvm::Constant *c); + + virtual void setPathWriter(TreeStreamWriter *tsw) { + pathWriter = tsw; + } + virtual void setSymbolicPathWriter(TreeStreamWriter *tsw) { + symPathWriter = tsw; + } + + virtual void setReplayOut(const struct BOut *out) { + assert(!replayPath && "cannot replay both buffer and path"); + replayOut = out; + replayPosition = 0; + } + + virtual void setReplayPath(const std::vector<bool> *path) { + assert(!replayOut && "cannot replay both buffer and path"); + replayPath = path; + replayPosition = 0; + } + + virtual const llvm::Module * + setModule(llvm::Module *module, const ModuleOptions &opts); + + virtual void useSeeds(const std::vector<struct BOut *> *seeds) { + usingSeeds = seeds; + } + + virtual void runFunctionAsMain(llvm::Function *f, + int argc, + char **argv, + char **envp); + + /*** Runtime options ***/ + + virtual void setHaltExecution(bool value) { + haltExecution = value; + } + + virtual void setInhibitForking(bool value) { + inhibitForking = value; + } + + /*** State accessor methods ***/ + + virtual unsigned getPathStreamID(const ExecutionState &state); + + virtual unsigned getSymbolicPathStreamID(const ExecutionState &state); + + virtual void getConstraintLog(const ExecutionState &state, + std::string &res, + bool asCVC = false); + + virtual bool getSymbolicSolution(const ExecutionState &state, + std::vector< + std::pair<std::string, + std::vector<unsigned char> > > + &res); + + virtual void getCoveredLines(const ExecutionState &state, + std::map<const std::string*, std::set<unsigned> > &res); +}; + +} // End klee namespace + +#endif diff --git a/lib/Core/ExecutorTimers.cpp b/lib/Core/ExecutorTimers.cpp new file mode 100644 index 00000000..51792e0d --- /dev/null +++ b/lib/Core/ExecutorTimers.cpp @@ -0,0 +1,220 @@ +//===-- ExecutorTimers.cpp ------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "CoreStats.h" +#include "Executor.h" +#include "PTree.h" +#include "StatsTracker.h" + +#include "klee/ExecutionState.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/KModule.h" +#include "klee/Internal/System/Time.h" + +#include "llvm/Function.h" +#include "llvm/Support/CommandLine.h" + +#include <unistd.h> +#include <signal.h> +#include <sys/time.h> +#include <math.h> + + +using namespace llvm; +using namespace klee; + +cl::opt<double> +MaxTime("max-time", + cl::desc("Halt execution after the specified number of seconds (0=off)"), + cl::init(0)); + +/// + +class HaltTimer : public Executor::Timer { + Executor *executor; + +public: + HaltTimer(Executor *_executor) : executor(_executor) {} + ~HaltTimer() {} + + void run() { + llvm::cerr << "KLEE: HaltTimer invoked\n"; + executor->setHaltExecution(true); + } +}; + +/// + +static const double kSecondsPerTick = .1; +static volatile unsigned timerTicks = 0; + +// XXX hack +extern "C" unsigned dumpStates, dumpPTree; +unsigned dumpStates = 0, dumpPTree = 0; + +static void onAlarm(int) { + ++timerTicks; +} + +// oooogalay +static void setupHandler() { + struct itimerval t; + struct timeval tv; + + tv.tv_sec = (long) kSecondsPerTick; + tv.tv_usec = (long) (fmod(kSecondsPerTick, 1.)*1000000); + + t.it_interval = t.it_value = tv; + + ::setitimer(ITIMER_REAL, &t, 0); + ::signal(SIGALRM, onAlarm); +} + +void Executor::initTimers() { + static bool first = true; + + if (first) { + first = false; + setupHandler(); + } + + if (MaxTime) { + addTimer(new HaltTimer(this), MaxTime); + } +} + +/// + +Executor::Timer::Timer() {} + +Executor::Timer::~Timer() {} + +class Executor::TimerInfo { +public: + Timer *timer; + + /// Approximate delay per timer firing. + double rate; + /// Wall time for next firing. + double nextFireTime; + +public: + TimerInfo(Timer *_timer, double _rate) + : timer(_timer), + rate(_rate), + nextFireTime(util::getWallTime() + rate) {} + ~TimerInfo() { delete timer; } +}; + +void Executor::addTimer(Timer *timer, double rate) { + timers.push_back(new TimerInfo(timer, rate)); +} + +void Executor::processTimers(ExecutionState *current, + double maxInstTime) { + static unsigned callsWithoutCheck = 0; + unsigned ticks = timerTicks; + + if (!ticks && ++callsWithoutCheck > 1000) { + setupHandler(); + ticks = 1; + } + + if (ticks || dumpPTree || dumpStates) { + if (dumpPTree) { + char name[32]; + sprintf(name, "ptree%08d.dot", (int) stats::instructions); + std::ostream *os = interpreterHandler->openOutputFile(name); + if (os) { + processTree->dump(*os); + delete os; + } + + dumpPTree = 0; + } + + if (dumpStates) { + std::ostream *os = interpreterHandler->openOutputFile("states.txt"); + + if (os) { + for (std::set<ExecutionState*>::const_iterator it = states.begin(), + ie = states.end(); it != ie; ++it) { + ExecutionState *es = *it; + *os << "(" << es << ","; + *os << "["; + ExecutionState::stack_ty::iterator next = es->stack.begin(); + ++next; + for (ExecutionState::stack_ty::iterator sfIt = es->stack.begin(), + sf_ie = es->stack.end(); sfIt != sf_ie; ++sfIt) { + *os << "('" << sfIt->kf->function->getName() << "',"; + if (next == es->stack.end()) { + *os << es->prevPC->info->line << "), "; + } else { + *os << next->caller->info->line << "), "; + ++next; + } + } + *os << "], "; + + StackFrame &sf = es->stack.back(); + uint64_t md2u = computeMinDistToUncovered(es->pc, + sf.minDistToUncoveredOnReturn); + uint64_t icnt = theStatisticManager->getIndexedValue(stats::instructions, + es->pc->info->id); + uint64_t cpicnt = sf.callPathNode->statistics.getValue(stats::instructions); + + *os << "{"; + *os << "'depth' : " << es->depth << ", "; + *os << "'weight' : " << es->weight << ", "; + *os << "'queryCost' : " << es->queryCost << ", "; + *os << "'coveredNew' : " << es->coveredNew << ", "; + *os << "'instsSinceCovNew' : " << es->instsSinceCovNew << ", "; + *os << "'md2u' : " << md2u << ", "; + *os << "'icnt' : " << icnt << ", "; + *os << "'CPicnt' : " << cpicnt << ", "; + *os << "}"; + *os << ")\n"; + } + + delete os; + } + + dumpStates = 0; + } + + if (maxInstTime>0 && current && !removedStates.count(current)) { + if (timerTicks*kSecondsPerTick > maxInstTime) { + klee_warning("max-instruction-time exceeded: %.2fs", + timerTicks*kSecondsPerTick); + terminateStateEarly(*current, "max-instruction-time exceeded"); + } + } + + if (!timers.empty()) { + double time = util::getWallTime(); + + for (std::vector<TimerInfo*>::iterator it = timers.begin(), + ie = timers.end(); it != ie; ++it) { + TimerInfo *ti = *it; + + if (time >= ti->nextFireTime) { + ti->timer->run(); + ti->nextFireTime = time + ti->rate; + } + } + } + + timerTicks = 0; + callsWithoutCheck = 0; + } +} + diff --git a/lib/Core/ExecutorUtil.cpp b/lib/Core/ExecutorUtil.cpp new file mode 100644 index 00000000..3b11dd42 --- /dev/null +++ b/lib/Core/ExecutorUtil.cpp @@ -0,0 +1,144 @@ +//===-- ExecutorUtil.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Executor.h" + +#include "klee/Expr.h" +#include "klee/Interpreter.h" +#include "klee/Machine.h" +#include "klee/Solver.h" + +#include "klee/Internal/Module/KModule.h" + +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/Streams.h" +#include "llvm/Target/TargetData.h" +#include <iostream> +#include <cassert> + +using namespace klee; +using namespace llvm; + +namespace klee { + +ref<Expr> +Executor::evalConstantExpr(llvm::ConstantExpr *ce) { + const llvm::Type *type = ce->getType(); + + ref<Expr> op1(0,Expr::Bool), op2(0,Expr::Bool), op3(0,Expr::Bool); + int numOperands = ce->getNumOperands(); + + if (numOperands > 0) op1 = evalConstant(ce->getOperand(0)); + if (numOperands > 1) op2 = evalConstant(ce->getOperand(1)); + if (numOperands > 2) op3 = evalConstant(ce->getOperand(2)); + + switch (ce->getOpcode()) { + case Instruction::Trunc: return ExtractExpr::createByteOff(op1, + 0, + Expr::getWidthForLLVMType(type)); + case Instruction::ZExt: return ZExtExpr::create(op1, + Expr::getWidthForLLVMType(type)); + case Instruction::SExt: return SExtExpr::create(op1, + Expr::getWidthForLLVMType(type)); + case Instruction::Add: return AddExpr::create(op1, op2); + case Instruction::Sub: return SubExpr::create(op1, op2); + case Instruction::Mul: return MulExpr::create(op1, op2); + case Instruction::SDiv: return SDivExpr::create(op1, op2); + case Instruction::UDiv: return UDivExpr::create(op1, op2); + case Instruction::SRem: return SRemExpr::create(op1, op2); + case Instruction::URem: return URemExpr::create(op1, op2); + case Instruction::And: return AndExpr::create(op1, op2); + case Instruction::Or: return OrExpr::create(op1, op2); + case Instruction::Xor: return XorExpr::create(op1, op2); + case Instruction::Shl: return ShlExpr::create(op1, op2); + case Instruction::LShr: return LShrExpr::create(op1, op2); + case Instruction::AShr: return AShrExpr::create(op1, op2); + case Instruction::BitCast: return op1; + + case Instruction::IntToPtr: { + return ZExtExpr::create(op1, Expr::getWidthForLLVMType(type)); + } + + case Instruction::PtrToInt: { + return ZExtExpr::create(op1, Expr::getWidthForLLVMType(type)); + } + + case Instruction::GetElementPtr: { + ref<Expr> base = op1; + + for (gep_type_iterator ii = gep_type_begin(ce), ie = gep_type_end(ce); + ii != ie; ++ii) { + ref<Expr> addend(0, kMachinePointerType); + + if (const StructType *st = dyn_cast<StructType>(*ii)) { + const StructLayout *sl = kmodule->targetData->getStructLayout(st); + const ConstantInt *ci = cast<ConstantInt>(ii.getOperand()); + + addend = Expr::createPointer(sl->getElementOffset((unsigned) + ci->getZExtValue())); + } else { + const SequentialType *st = cast<SequentialType>(*ii); + ref<Expr> index = evalConstant(cast<Constant>(ii.getOperand())); + unsigned elementSize = kmodule->targetData->getTypeStoreSize(st->getElementType()); + + index = Expr::createCoerceToPointerType(index); + addend = MulExpr::create(index, + Expr::createPointer(elementSize)); + } + + base = AddExpr::create(base, addend); + } + + return base; + } + + case Instruction::ICmp: { + switch(ce->getPredicate()) { + case ICmpInst::ICMP_EQ: return EqExpr::create(op1, op2); + case ICmpInst::ICMP_NE: return NeExpr::create(op1, op2); + case ICmpInst::ICMP_UGT: return UgtExpr::create(op1, op2); + case ICmpInst::ICMP_UGE: return UgeExpr::create(op1, op2); + case ICmpInst::ICMP_ULT: return UltExpr::create(op1, op2); + case ICmpInst::ICMP_ULE: return UleExpr::create(op1, op2); + case ICmpInst::ICMP_SGT: return SgtExpr::create(op1, op2); + case ICmpInst::ICMP_SGE: return SgeExpr::create(op1, op2); + case ICmpInst::ICMP_SLT: return SltExpr::create(op1, op2); + case ICmpInst::ICMP_SLE: return SleExpr::create(op1, op2); + default: + assert(0 && "unhandled ICmp predicate"); + } + } + + case Instruction::Select: { + return SelectExpr::create(op1, op2, op3); + } + + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FCmp: + assert(0 && "floating point ConstantExprs unsupported"); + + default : + assert(0 && "unknown ConstantExpr type"); + } +} + +} diff --git a/lib/Core/ExternalDispatcher.cpp b/lib/Core/ExternalDispatcher.cpp new file mode 100644 index 00000000..9e3b0a49 --- /dev/null +++ b/lib/Core/ExternalDispatcher.cpp @@ -0,0 +1,230 @@ +//===-- ExternalDispatcher.cpp --------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExternalDispatcher.h" + +#include "llvm/Module.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/ModuleProvider.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/CallSite.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include <setjmp.h> +#include <signal.h> + +using namespace llvm; +using namespace klee; + +/***/ + +static jmp_buf escapeCallJmpBuf; + +extern "C" { + +static void sigsegv_handler(int signal, siginfo_t *info, void *context) { + longjmp(escapeCallJmpBuf, 1); +} + +} + +void *ExternalDispatcher::resolveSymbol(const std::string &name) { + assert(executionEngine); + + const char *str = name.c_str(); + + // We use this to validate that function names can be resolved so we + // need to match how the JIT does it. Unfortunately we can't + // directly access the JIT resolution function + // JIT::getPointerToNamedFunction so we emulate the important points. + + if (str[0] == 1) // asm specifier, skipped + ++str; + + void *addr = dl_symbols.SearchForAddressOfSymbol(str); + if (addr) + return addr; + + // If it has an asm specifier and starts with an underscore we retry + // without the underscore. I (DWD) don't know why. + if (name[0] == 1 && str[0]=='_') { + ++str; + addr = dl_symbols.SearchForAddressOfSymbol(str); + } + + return addr; +} + +ExternalDispatcher::ExternalDispatcher() { + dispatchModule = new Module("ExternalDispatcher"); + ExistingModuleProvider* MP = new ExistingModuleProvider(dispatchModule); + + std::string error; + executionEngine = ExecutionEngine::createJIT(MP, &error); + if (!executionEngine) { + llvm::cerr << "unable to make jit: " << error << "\n"; + abort(); + } + + // from ExecutionEngine::create + if (executionEngine) { + // Make sure we can resolve symbols in the program as well. The zero arg + // to the function tells DynamicLibrary to load the program, not a library. + try { + dl_symbols.LoadLibraryPermanently(0); + } catch (...) { + assert(0 && "Exception in LoadLibraryPermantently.\n"); + } + } + +#ifdef WINDOWS + preboundFunctions["getpid"] = (void*) (long) getpid; + preboundFunctions["putchar"] = (void*) (long) putchar; + preboundFunctions["printf"] = (void*) (long) printf; + preboundFunctions["fprintf"] = (void*) (long) fprintf; + preboundFunctions["sprintf"] = (void*) (long) sprintf; +#endif +} + +ExternalDispatcher::~ExternalDispatcher() { + delete executionEngine; +} + +bool ExternalDispatcher::executeCall(Function *f, Instruction *i, uint64_t *args) { + dispatchers_ty::iterator it = dispatchers.find(i); + Function *dispatcher; + + if (it == dispatchers.end()) { +#ifdef WINDOWS + std::map<std::string, void*>::iterator it2 = + preboundFunctions.find(f->getName())); + + if (it2 != preboundFunctions.end()) { + // only bind once + if (it2->second) { + executionEngine->addGlobalMapping(f, it2->second); + it2->second = 0; + } + } +#endif + + dispatcher = createDispatcher(f,i); + + dispatchers.insert(std::make_pair(i, dispatcher)); + + if (dispatcher) { + // force the JIT execution engine to go ahead and build the + // function. this ensures that any errors or assertions in the + // compilation process will trigger crashes instead of being + // caught as aborts in the external function. + executionEngine->recompileAndRelinkFunction(dispatcher); + } + } else { + dispatcher = it->second; + } + + return runProtectedCall(dispatcher, args); +} + +// XXX not reentrant +static uint64_t *gTheArgsP; + +bool ExternalDispatcher::runProtectedCall(Function *f, uint64_t *args) { + struct sigaction segvAction, segvActionOld; + bool res; + + if (!f) + return false; + + std::vector<GenericValue> gvArgs; + gTheArgsP = args; + + segvAction.sa_handler = 0; + memset(&segvAction.sa_mask, 0, sizeof(segvAction.sa_mask)); + segvAction.sa_flags = SA_SIGINFO; + segvAction.sa_sigaction = ::sigsegv_handler; + sigaction(SIGSEGV, &segvAction, &segvActionOld); + + if (setjmp(escapeCallJmpBuf)) { + res = false; + } else { + executionEngine->runFunction(f, gvArgs); + res = true; + } + + sigaction(SIGSEGV, &segvActionOld, 0); + return res; +} + +// for performance purposes we construct the stub in such a way that +// the arguments pointer is passed through the static global variable +// gTheArgsP in this file. This is done so that the stub function +// prototype trivially matches the special cases that the JIT knows +// how to directly call. If this is not done, then the jit will end up +// generating a nullary stub just to call our stub, for every single +// function call. +Function *ExternalDispatcher::createDispatcher(Function *target, Instruction *inst) { + if (!resolveSymbol(target->getName())) + return 0; + + CallSite cs; + if (inst->getOpcode()==Instruction::Call) { + cs = CallSite(cast<CallInst>(inst)); + } else { + cs = CallSite(cast<InvokeInst>(inst)); + } + + Value **args = new Value*[cs.arg_size()]; + + std::vector<const Type*> nullary; + + Function *dispatcher = Function::Create(FunctionType::get(Type::VoidTy, + nullary, false), + GlobalVariable::ExternalLinkage, + "", + dispatchModule); + + + BasicBlock *dBB = BasicBlock::Create("entry", dispatcher); + + Instruction *argI64sp = new IntToPtrInst(ConstantInt::get(Type::Int64Ty, (long) (void*) &gTheArgsP), + PointerType::getUnqual(PointerType::getUnqual(Type::Int64Ty)), + "argsp", + dBB); + Instruction *argI64s = new LoadInst(argI64sp, "args", dBB); + + unsigned i = 0; + for (CallSite::arg_iterator ai = cs.arg_begin(), ae = cs.arg_end(); + ai!=ae; ++ai, ++i) { + Value *index = ConstantInt::get(Type::Int32Ty, i+1); + + Instruction *argI64p = GetElementPtrInst::Create(argI64s, index, "", dBB); + Instruction *argp = new BitCastInst(argI64p, + PointerType::getUnqual((*ai)->getType()), "", dBB); + args[i] = new LoadInst(argp, "", dBB); + } + + Instruction *result = CallInst::Create(target, args, args+i, "", dBB); + + if (result->getType() != Type::VoidTy) { + Instruction *resp = new BitCastInst(argI64s, + PointerType::getUnqual(result->getType()), "", dBB); + new StoreInst(result, resp, dBB); + } + + ReturnInst::Create(dBB); + + delete[] args; + + return dispatcher; +} diff --git a/lib/Core/ExternalDispatcher.h b/lib/Core/ExternalDispatcher.h new file mode 100644 index 00000000..fc8f80f4 --- /dev/null +++ b/lib/Core/ExternalDispatcher.h @@ -0,0 +1,50 @@ +//===-- ExternalDispatcher.h ------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_EXTERNALDISPATCHER_H +#define KLEE_EXTERNALDISPATCHER_H + +#include <map> +#include "llvm/System/DynamicLibrary.h" + +namespace llvm { + class ExecutionEngine; + class Instruction; + class Function; + class FunctionType; + class Module; +} + +namespace klee { + class ExternalDispatcher { + private: + typedef std::map<const llvm::Instruction*,llvm::Function*> dispatchers_ty; + dispatchers_ty dispatchers; + llvm::Module *dispatchModule; + llvm::ExecutionEngine *executionEngine; + llvm::sys::DynamicLibrary dl_symbols; + std::map<std::string, void*> preboundFunctions; + + llvm::Function *createDispatcher(llvm::Function *f, llvm::Instruction *i); + bool runProtectedCall(llvm::Function *f, uint64_t *args); + + public: + ExternalDispatcher(); + ~ExternalDispatcher(); + + /* Call the given function using the parameter passing convention of + * ci with arguments in args[1], args[2], ... and writing the result + * into args[0]. + */ + bool executeCall(llvm::Function *function, llvm::Instruction *i, uint64_t *args); + void *resolveSymbol(const std::string &name); + }; +} + +#endif diff --git a/lib/Core/ImpliedValue.cpp b/lib/Core/ImpliedValue.cpp new file mode 100644 index 00000000..386c8d80 --- /dev/null +++ b/lib/Core/ImpliedValue.cpp @@ -0,0 +1,274 @@ +//===-- ImpliedValue.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ImpliedValue.h" + +#include "klee/Constraints.h" +#include "klee/Expr.h" +#include "klee/Solver.h" +// FIXME: Use APInt. +#include "klee/Internal/Support/IntEvaluation.h" + +#include "klee/util/ExprUtil.h" + +#include <iostream> +#include <map> +#include <set> + +using namespace klee; + +// XXX we really want to do some sort of canonicalization of exprs +// globally so that cases below become simpler +static void _getImpliedValue(ref<Expr> e, + uint64_t value, + ImpliedValueList &results) { + switch (e.getKind()) { + + case Expr::Constant: { + assert(value == e.getConstantValue() && "error in implied value calculation"); + break; + } + + // Special + + case Expr::NotOptimized: break; + + case Expr::Read: { + // XXX in theory it is possible to descend into a symbolic index + // under certain circumstances (all values known, known value + // unique, or range known, max / min hit). Seems unlikely this + // would work often enough to be worth the effort. + ReadExpr *re = static_ref_cast<ReadExpr>(e); + results.push_back(std::make_pair(re, + ConstantExpr::create(value, e.getWidth()))); + break; + } + + case Expr::Select: { + // not much to do, could improve with range analysis + SelectExpr *se = static_ref_cast<SelectExpr>(e); + + if (se->trueExpr.isConstant()) { + if (se->falseExpr.isConstant()) { + if (se->trueExpr.getConstantValue() != se->falseExpr.getConstantValue()) { + if (value == se->trueExpr.getConstantValue()) { + _getImpliedValue(se->cond, 1, results); + } else { + assert(value == se->falseExpr.getConstantValue() && + "err in implied value calculation"); + _getImpliedValue(se->cond, 0, results); + } + } + } + } + break; + } + + case Expr::Concat: { + ConcatExpr *ce = static_ref_cast<ConcatExpr>(e); + _getImpliedValue(ce->getKid(0), (value >> ce->getKid(1).getWidth()) & ((1 << ce->getKid(0).getWidth()) - 1), results); + _getImpliedValue(ce->getKid(1), value & ((1 << ce->getKid(1).getWidth()) - 1), results); + break; + } + + case Expr::Extract: { + // XXX, could do more here with "some bits" mask + break; + } + + // Casting + + case Expr::ZExt: + case Expr::SExt: { + CastExpr *ce = static_ref_cast<CastExpr>(e); + _getImpliedValue(ce->src, + bits64::truncateToNBits(value, + ce->src.getWidth()), + results); + break; + } + + // Arithmetic + + case Expr::Add: { // constants on left + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->left.isConstant()) { + uint64_t nvalue = ints::sub(value, + be->left.getConstantValue(), + be->left.getWidth()); + _getImpliedValue(be->right, nvalue, results); + } + break; + } + case Expr::Sub: { // constants on left + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->left.isConstant()) { + uint64_t nvalue = ints::sub(be->left.getConstantValue(), + value, + be->left.getWidth()); + _getImpliedValue(be->right, nvalue, results); + } + break; + } + case Expr::Mul: { + // XXX can do stuff here, but need valid mask and other things + // because of bits that might be lost + break; + } + + case Expr::UDiv: + case Expr::SDiv: + case Expr::URem: + case Expr::SRem: + // no, no, no + break; + + // Binary + + case Expr::And: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->getWidth() == Expr::Bool) { + if (value) { + _getImpliedValue(be->left, value, results); + _getImpliedValue(be->right, value, results); + } + } else { + // XXX, we can basically propogate a mask here + // where we know "some bits". may or may not be + // useful. + } + break; + } + case Expr::Or: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (!value) { + _getImpliedValue(be->left, 0, results); + _getImpliedValue(be->right, 0, results); + } else { + // XXX, can do more? + } + break; + } + case Expr::Xor: { // constants on left + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->left.isConstant()) { + _getImpliedValue(be->right, value ^ be->left.getConstantValue(), results); + } + break; + } + + // Comparison + case Expr::Ne: + value = !value; + /* fallthru */ + case Expr::Eq: { + EqExpr *ee = static_ref_cast<EqExpr>(e); + if (value) { + if (ee->left.isConstant()) + _getImpliedValue(ee->right, ee->left.getConstantValue(), results); + } else { + // look for limited value range, woohoo + // + // in general anytime one side was restricted to two values we + // can apply this trick. the only obvious case where this + // occurs, aside from booleans, is as the result of a select + // expression where the true and false branches are single + // valued and distinct. + + if (ee->left.isConstant()) { + if (ee->left.getWidth() == Expr::Bool) { + _getImpliedValue(ee->right, !ee->left.getConstantValue(), results); + } + } + } + break; + } + + default: + break; + } +} + +void ImpliedValue::getImpliedValues(ref<Expr> e, + ref<Expr> value, + ImpliedValueList &results) { + assert(value.isConstant() && "non-constant in place of constant"); + _getImpliedValue(e, value.getConstantValue(), results); +} + +void ImpliedValue::checkForImpliedValues(Solver *S, ref<Expr> e, + ref<Expr> value) { + assert(value.isConstant() && "non-constant in place of constant"); + + std::vector<ref<ReadExpr> > reads; + std::map<ref<ReadExpr>, ref<Expr> > found; + ImpliedValueList results; + + getImpliedValues(e, value, results); + + for (ImpliedValueList::iterator i = results.begin(), ie = results.end(); + i != ie; ++i) { + std::map<ref<ReadExpr>, ref<Expr> >::iterator it = found.find(i->first); + if (it != found.end()) { + assert(it->second.getConstantValue() == i->second.getConstantValue() && + "I don't think so Scott"); + } else { + found.insert(std::make_pair(i->first, i->second)); + } + } + + findReads(e, false, reads); + std::set< ref<ReadExpr> > readsSet(reads.begin(), reads.end()); + reads = std::vector< ref<ReadExpr> >(readsSet.begin(), readsSet.end()); + + std::vector<ref<Expr> > assumption; + assumption.push_back(EqExpr::create(e, value)); + + // obscure... we need to make sure that all the read indices are + // bounds checked. if we don't do this we can end up constructing + // invalid counterexamples because STP will happily make out of + // bounds indices which will not get picked up. this is of utmost + // importance if we are being backed by the CexCachingSolver. + + for (std::vector< ref<ReadExpr> >::iterator i = reads.begin(), + ie = reads.end(); i != ie; ++i) { + ReadExpr *re = i->get(); + ref<Expr> size = ref<Expr>(re->updates.root->size, kMachinePointerType); + assumption.push_back(UltExpr::create(re->index, size)); + } + + ConstraintManager assume(assumption); + for (std::vector< ref<ReadExpr> >::iterator i = reads.begin(), + ie = reads.end(); i != ie; ++i) { + ref<ReadExpr> var = *i; + ref<Expr> possible; + bool success = S->getValue(Query(assume, var), possible); + assert(success && "FIXME: Unhandled solver failure"); + std::map<ref<ReadExpr>, ref<Expr> >::iterator it = found.find(var); + bool res; + success = S->mustBeTrue(Query(assume, EqExpr::create(var, possible)), res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + if (it != found.end()) { + assert(possible.getConstantValue() == it->second.getConstantValue()); + found.erase(it); + } + } else { + if (it!=found.end()) { + ref<Expr> binding = it->second; + llvm::cerr << "checkForImpliedValues: " << e << " = " << value << "\n" + << "\t\t implies " << var << " == " << binding + << " (error)\n"; + assert(0); + } + } + } + + assert(found.empty()); +} diff --git a/lib/Core/ImpliedValue.h b/lib/Core/ImpliedValue.h new file mode 100644 index 00000000..51ec6e9b --- /dev/null +++ b/lib/Core/ImpliedValue.h @@ -0,0 +1,38 @@ +//===-- ImpliedValue.h ------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_IMPLIEDVALUE_H +#define KLEE_IMPLIEDVALUE_H + +#include "klee/Expr.h" + +#include <vector> + +// The idea of implied values is that often we know the result of some +// expression e is a concrete value C. In many cases this directly +// implies that some variable x embedded in e is also a concrete value +// (derived from C). This module is used for finding such variables +// and their computed values. + +namespace klee { + class ConstantExpr; + class Expr; + class ReadExpr; + class Solver; + + typedef std::vector< std::pair<ref<ReadExpr>, ref<Expr> > > ImpliedValueList; + + namespace ImpliedValue { + void getImpliedValues(ref<Expr> e, ref<Expr> cvalue, ImpliedValueList &result); + void checkForImpliedValues(Solver *S, ref<Expr> e, ref<Expr> cvalue); + } + +} + +#endif diff --git a/lib/Core/Makefile b/lib/Core/Makefile new file mode 100755 index 00000000..4da3c7ea --- /dev/null +++ b/lib/Core/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Core/Makefile -----------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleeCore +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Core/Memory.cpp b/lib/Core/Memory.cpp new file mode 100644 index 00000000..cd563551 --- /dev/null +++ b/lib/Core/Memory.cpp @@ -0,0 +1,812 @@ +//===-- Memory.cpp --------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "Memory.h" + +#include "klee/Expr.h" +#include "klee/Machine.h" +#include "klee/Solver.h" +#include "klee/util/BitArray.h" + +#include "ObjectHolder.h" + +#include <llvm/Function.h> +#include <llvm/Instruction.h> +#include <llvm/Value.h> + +#include <iostream> +#include <cassert> +#include <sstream> + +using namespace llvm; +using namespace klee; + +/***/ + +ObjectHolder::ObjectHolder(const ObjectHolder &b) : os(b.os) { + if (os) ++os->refCount; +} + +ObjectHolder::ObjectHolder(ObjectState *_os) : os(_os) { + if (os) ++os->refCount; +} + +ObjectHolder::~ObjectHolder() { + if (os && --os->refCount==0) delete os; +} + +ObjectHolder &ObjectHolder::operator=(const ObjectHolder &b) { + if (b.os) ++b.os->refCount; + if (os && --os->refCount==0) delete os; + os = b.os; + return *this; +} + +/***/ + +int MemoryObject::counter = 0; + +extern "C" void vc_DeleteExpr(void*); + +MemoryObject::~MemoryObject() { + // FIXME: This shouldn't be necessary. Array's should be ref-counted + // just like everything else, and the interaction with the STP array + // should hide at least inside the Expr/Solver layers. + if (array) { + if (array->stpInitialArray) { + ::vc_DeleteExpr(array->stpInitialArray); + array->stpInitialArray = 0; + } + delete array; + } +} + +void MemoryObject::getAllocInfo(std::string &result) const { + std::ostringstream info; + + info << "MO" << id << "[" << size << "]"; + + if (allocSite) { + info << " allocated at "; + if (const Instruction *i = dyn_cast<Instruction>(allocSite)) { + info << i->getParent()->getParent()->getName() << "():"; + info << *i; + } else if (const GlobalValue *gv = dyn_cast<GlobalValue>(allocSite)) { + info << "global:" << gv->getName(); + } else { + info << "value:" << *allocSite; + } + } else { + info << " (no allocation info)"; + } + + result = info.str(); +} + +/***/ + +ObjectState::ObjectState(const MemoryObject *mo, unsigned _size) + : copyOnWriteOwner(0), + refCount(0), + object(mo), + concreteStore(new uint8_t[_size]), + concreteMask(0), + flushMask(0), + knownSymbolics(0), + size(_size), + updates(mo->array, false, 0), + readOnly(false) { +} + +ObjectState::ObjectState(const ObjectState &os) + : copyOnWriteOwner(0), + refCount(0), + object(os.object), + concreteStore(new uint8_t[os.size]), + concreteMask(os.concreteMask ? new BitArray(*os.concreteMask, os.size) : 0), + flushMask(os.flushMask ? new BitArray(*os.flushMask, os.size) : 0), + knownSymbolics(0), + size(os.size), + updates(os.updates), + readOnly(false) { + assert(!os.readOnly && "no need to copy read only object?"); + + if (os.knownSymbolics) { + knownSymbolics = new ref<Expr>[size]; + for (unsigned i=0; i<size; i++) + knownSymbolics[i] = os.knownSymbolics[i]; + } + + memcpy(concreteStore, os.concreteStore, size*sizeof(*concreteStore)); +} + +ObjectState::~ObjectState() { + if (concreteMask) delete concreteMask; + if (flushMask) delete flushMask; + if (knownSymbolics) delete[] knownSymbolics; + delete[] concreteStore; +} + +/***/ + +void ObjectState::makeConcrete() { + if (concreteMask) delete concreteMask; + if (flushMask) delete flushMask; + if (knownSymbolics) delete[] knownSymbolics; + concreteMask = 0; + flushMask = 0; + knownSymbolics = 0; +} + +void ObjectState::makeSymbolic() { + assert(!updates.head && + "XXX makeSymbolic of objects with symbolic values is unsupported"); + updates.isRooted = true; + + // XXX simplify this, can just delete various arrays I guess + for (unsigned i=0; i<size; i++) { + markByteSymbolic(i); + setKnownSymbolic(i, 0); + markByteFlushed(i); + } +} + +void ObjectState::initializeToZero() { + makeConcrete(); + memset(concreteStore, 0, size); +} + +void ObjectState::initializeToRandom() { + makeConcrete(); + for (unsigned i=0; i<size; i++) { + // randomly selected by 256 sided die + concreteStore[i] = 0xAB; + } +} + +/* +Cache Invariants +-- +isByteKnownSymbolic(i) => !isByteConcrete(i) +isByteConcrete(i) => !isByteKnownSymbolic(i) +!isByteFlushed(i) => (isByteConcrete(i) || isByteKnownSymbolic(i)) + */ + +void ObjectState::fastRangeCheckOffset(ref<Expr> offset, + unsigned *base_r, + unsigned *size_r) const { + *base_r = 0; + *size_r = size; +} + +void ObjectState::flushRangeForRead(unsigned rangeBase, + unsigned rangeSize) const { + if (!flushMask) flushMask = new BitArray(size, true); + + for (unsigned offset=rangeBase; offset<rangeBase+rangeSize; offset++) { + if (!isByteFlushed(offset)) { + if (isByteConcrete(offset)) { + updates.extend(ConstantExpr::create(offset, kMachinePointerType), + ConstantExpr::create(concreteStore[offset], Expr::Int8)); + } else { + assert(isByteKnownSymbolic(offset) && "invalid bit set in flushMask"); + updates.extend(ConstantExpr::create(offset, kMachinePointerType), + knownSymbolics[offset]); + } + + flushMask->unset(offset); + } + } +} + +void ObjectState::flushRangeForWrite(unsigned rangeBase, + unsigned rangeSize) { + if (!flushMask) flushMask = new BitArray(size, true); + + for (unsigned offset=rangeBase; offset<rangeBase+rangeSize; offset++) { + if (!isByteFlushed(offset)) { + if (isByteConcrete(offset)) { + updates.extend(ConstantExpr::create(offset, kMachinePointerType), + ConstantExpr::create(concreteStore[offset], Expr::Int8)); + markByteSymbolic(offset); + } else { + assert(isByteKnownSymbolic(offset) && "invalid bit set in flushMask"); + updates.extend(ConstantExpr::create(offset, kMachinePointerType), + knownSymbolics[offset]); + setKnownSymbolic(offset, 0); + } + + flushMask->unset(offset); + } else { + // flushed bytes that are written over still need + // to be marked out + if (isByteConcrete(offset)) { + markByteSymbolic(offset); + } else if (isByteKnownSymbolic(offset)) { + setKnownSymbolic(offset, 0); + } + } + } +} + +bool ObjectState::isByteConcrete(unsigned offset) const { + return !concreteMask || concreteMask->get(offset); +} + +bool ObjectState::isByteFlushed(unsigned offset) const { + return flushMask && !flushMask->get(offset); +} + +bool ObjectState::isByteKnownSymbolic(unsigned offset) const { + return knownSymbolics && knownSymbolics[offset].get(); +} + +void ObjectState::markByteConcrete(unsigned offset) { + if (concreteMask) + concreteMask->set(offset); +} + +void ObjectState::markByteSymbolic(unsigned offset) { + if (!concreteMask) + concreteMask = new BitArray(size, true); + concreteMask->unset(offset); +} + +void ObjectState::markByteUnflushed(unsigned offset) { + if (flushMask) + flushMask->set(offset); +} + +void ObjectState::markByteFlushed(unsigned offset) { + if (!flushMask) { + flushMask = new BitArray(size, false); + } else { + flushMask->unset(offset); + } +} + +void ObjectState::setKnownSymbolic(unsigned offset, + Expr *value /* can be null */) { + if (knownSymbolics) { + knownSymbolics[offset] = value; + } else { + if (value) { + knownSymbolics = new ref<Expr>[size]; + knownSymbolics[offset] = value; + } + } +} + +/***/ + +ref<Expr> ObjectState::read8(unsigned offset) const { + if (isByteConcrete(offset)) { + return ConstantExpr::create(concreteStore[offset], Expr::Int8); + } else if (isByteKnownSymbolic(offset)) { + return knownSymbolics[offset]; + } else { + assert(isByteFlushed(offset) && "unflushed byte without cache value"); + + return ReadExpr::create(updates, + ConstantExpr::create(offset, kMachinePointerType)); + } +} + +ref<Expr> ObjectState::read8(ref<Expr> offset) const { + assert(!offset.isConstant() && "constant offset passed to symbolic read8"); + unsigned base, size; + fastRangeCheckOffset(offset, &base, &size); + flushRangeForRead(base, size); + + if (size>4096) { + std::string allocInfo; + object->getAllocInfo(allocInfo); + klee_warning_once(0, "flushing %d bytes on read, may be slow and/or crash: %s", + size, + allocInfo.c_str()); + } + + return ReadExpr::create(updates, offset); +} + +void ObjectState::write8(unsigned offset, uint8_t value) { + //assert(read_only == false && "writing to read-only object!"); + concreteStore[offset] = value; + setKnownSymbolic(offset, 0); + + markByteConcrete(offset); + markByteUnflushed(offset); +} + +void ObjectState::write8(unsigned offset, ref<Expr> value) { + // can happen when ExtractExpr special cases + if (value.isConstant()) { + write8(offset, (uint8_t) value.getConstantValue()); + } else { + setKnownSymbolic(offset, value.get()); + + markByteSymbolic(offset); + markByteUnflushed(offset); + } +} + +void ObjectState::write8(ref<Expr> offset, ref<Expr> value) { + assert(!offset.isConstant() && "constant offset passed to symbolic write8"); + unsigned base, size; + fastRangeCheckOffset(offset, &base, &size); + flushRangeForWrite(base, size); + + if (size>4096) { + std::string allocInfo; + object->getAllocInfo(allocInfo); + klee_warning_once(0, "flushing %d bytes on read, may be slow and/or crash: %s", + size, + allocInfo.c_str()); + } + + updates.extend(offset, value); +} + +/***/ + +ref<Expr> ObjectState::read(ref<Expr> offset, Expr::Width width) const { + if (offset.isConstant()) { + return read((unsigned) offset.getConstantValue(), width); + } else { + switch (width) { + case Expr::Bool: return read1(offset); + case Expr::Int8: return read8(offset); + case Expr::Int16: return read16(offset); + case Expr::Int32: return read32(offset); + case Expr::Int64: return read64(offset); + default: assert(0 && "invalid type"); + } + } +} + +ref<Expr> ObjectState::read(unsigned offset, Expr::Width width) const { + switch (width) { + case Expr::Bool: return read1(offset); + case Expr::Int8: return read8(offset); + case Expr::Int16: return read16(offset); + case Expr::Int32: return read32(offset); + case Expr::Int64: return read64(offset); + default: assert(0 && "invalid type"); + } +} + +ref<Expr> ObjectState::read1(unsigned offset) const { + return ExtractExpr::createByteOff(read8(offset), 0, Expr::Bool); +} + +ref<Expr> ObjectState::read1(ref<Expr> offset) const { + return ExtractExpr::createByteOff(read8(offset), 0, Expr::Bool); +} + +ref<Expr> ObjectState::read16(unsigned offset) const { + if (kMachineByteOrder == machine::MSB) { + return ConcatExpr::create(read8(offset+0), + read8(offset+1)); + } else { + return ConcatExpr::create(read8(offset+1), + read8(offset+0)); + } +} + +ref<Expr> ObjectState::read16(ref<Expr> offset) const { + if (kMachineByteOrder == machine::MSB) { + return ConcatExpr::create + (read8(AddExpr::create(offset, + ConstantExpr::create(0, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(1, + kMachinePointerType)))); + } else { + return ConcatExpr::create + (read8(AddExpr::create(offset, + ConstantExpr::create(1, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(0, + kMachinePointerType)))); + } +} + +ref<Expr> ObjectState::read32(unsigned offset) const { + if (kMachineByteOrder == machine::MSB) { + return ConcatExpr::create4(read8(offset+0), + read8(offset+1), + read8(offset+2), + read8(offset+3)); + } else { + return ConcatExpr::create4(read8(offset+3), + read8(offset+2), + read8(offset+1), + read8(offset+0)); + } +} + +ref<Expr> ObjectState::read32(ref<Expr> offset) const { + if (kMachineByteOrder == machine::MSB) { + return ConcatExpr::create4 + (read8(AddExpr::create(offset, + ConstantExpr::create(0, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(1, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(2, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(3, + kMachinePointerType)))); + } else { + return ConcatExpr::create4 + (read8(AddExpr::create(offset, + ConstantExpr::create(3, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(2, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(1, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(0, + kMachinePointerType)))); + } +} + +ref<Expr> ObjectState::read64(unsigned offset) const { + if (kMachineByteOrder == machine::MSB) { + return ConcatExpr::create8(read8(offset+0), + read8(offset+1), + read8(offset+2), + read8(offset+3), + read8(offset+4), + read8(offset+5), + read8(offset+6), + read8(offset+7)); + } else { + return ConcatExpr::create8(read8(offset+7), + read8(offset+6), + read8(offset+5), + read8(offset+4), + read8(offset+3), + read8(offset+2), + read8(offset+1), + read8(offset+0)); + } +} + +ref<Expr> ObjectState::read64(ref<Expr> offset) const { + if (kMachineByteOrder == machine::MSB) { + return ConcatExpr::create8 + (read8(AddExpr::create(offset, + ConstantExpr::create(0, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(1, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(2, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(3, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(4, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(5, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(6, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(7, + kMachinePointerType)))); + } else { + return ConcatExpr::create8 + (read8(AddExpr::create(offset, + ConstantExpr::create(7, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(6, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(5, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(4, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(3, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(2, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(1, + kMachinePointerType))), + read8(AddExpr::create(offset, + ConstantExpr::create(0, + kMachinePointerType)))); + } +} + +void ObjectState::write(ref<Expr> offset, ref<Expr> value) { + Expr::Width w = value.getWidth(); + if (offset.isConstant()) { + write(offset.getConstantValue(), value); + } else { + switch(w) { + case Expr::Bool: write1(offset, value); break; + case Expr::Int8: write8(offset, value); break; + case Expr::Int16: write16(offset, value); break; + case Expr::Int32: write32(offset, value); break; + case Expr::Int64: write64(offset, value); break; + default: assert(0 && "invalid number of bytes in write"); + } + } +} + +void ObjectState::write(unsigned offset, ref<Expr> value) { + Expr::Width w = value.getWidth(); + if (value.isConstant()) { + uint64_t val = value.getConstantValue(); + switch(w) { + case Expr::Bool: + case Expr::Int8: write8(offset, val); break; + case Expr::Int16: write16(offset, val); break; + case Expr::Int32: write32(offset, val); break; + case Expr::Int64: write64(offset, val); break; + default: assert(0 && "invalid number of bytes in write"); + } + } else { + switch(w) { + case Expr::Bool: write1(offset, value); break; + case Expr::Int8: write8(offset, value); break; + case Expr::Int16: write16(offset, value); break; + case Expr::Int32: write32(offset, value); break; + case Expr::Int64: write64(offset, value); break; + default: assert(0 && "invalid number of bytes in write"); + } + } +} + +void ObjectState::write1(unsigned offset, ref<Expr> value) { + write8(offset, ZExtExpr::create(value, Expr::Int8)); +} + +void ObjectState::write1(ref<Expr> offset, ref<Expr> value) { + write8(offset, ZExtExpr::create(value, Expr::Int8)); +} + +void ObjectState::write16(unsigned offset, uint16_t value) { + if (kMachineByteOrder == machine::MSB) { + write8(offset+0, (uint8_t) (value >> 8)); + write8(offset+1, (uint8_t) (value >> 0)); + } else { + write8(offset+1, (uint8_t) (value >> 8)); + write8(offset+0, (uint8_t) (value >> 0)); + } +} + +void ObjectState::write16(unsigned offset, ref<Expr> value) { + if (kMachineByteOrder == machine::MSB) { + write8(offset+0, ExtractExpr::createByteOff(value, 1)); + write8(offset+1, ExtractExpr::createByteOff(value, 0)); + } else { + write8(offset+1, ExtractExpr::createByteOff(value, 1)); + write8(offset+0, ExtractExpr::createByteOff(value, 0)); + } +} + + +void ObjectState::write16(ref<Expr> offset, ref<Expr> value) { + if (kMachineByteOrder == machine::MSB) { + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,1)); + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,0)); + } else { + write8(AddExpr::create(offset, + ConstantExpr::create(1, kMachinePointerType)), + ExtractExpr::createByteOff(value,1)); + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,0)); + } +} + +void ObjectState::write32(unsigned offset, uint32_t value) { + if (kMachineByteOrder == machine::MSB) { + write8(offset+0, (uint8_t) (value >> 24)); + write8(offset+1, (uint8_t) (value >> 16)); + write8(offset+2, (uint8_t) (value >> 8)); + write8(offset+3, (uint8_t) (value >> 0)); + } else { + write8(offset+3, (uint8_t) (value >> 24)); + write8(offset+2, (uint8_t) (value >> 16)); + write8(offset+1, (uint8_t) (value >> 8)); + write8(offset+0, (uint8_t) (value >> 0)); + } +} + +void ObjectState::write32(unsigned offset, ref<Expr> value) { + if (kMachineByteOrder == machine::MSB) { + write8(offset+0, ExtractExpr::createByteOff(value, 3)); + write8(offset+1, ExtractExpr::createByteOff(value, 2)); + write8(offset+2, ExtractExpr::createByteOff(value, 1)); + write8(offset+3, ExtractExpr::createByteOff(value, 0)); + } else { + write8(offset+3, ExtractExpr::createByteOff(value, 3)); + write8(offset+2, ExtractExpr::createByteOff(value, 2)); + write8(offset+1, ExtractExpr::createByteOff(value, 1)); + write8(offset+0, ExtractExpr::createByteOff(value, 0)); + } +} + +void ObjectState::write32(ref<Expr> offset, ref<Expr> value) { + if (kMachineByteOrder == machine::MSB) { + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,3)); + write8(AddExpr::create(offset, + ConstantExpr::create(1, kMachinePointerType)), + ExtractExpr::createByteOff(value,2)); + write8(AddExpr::create(offset, + ConstantExpr::create(2, kMachinePointerType)), + ExtractExpr::createByteOff(value,1)); + write8(AddExpr::create(offset, + ConstantExpr::create(3, kMachinePointerType)), + ExtractExpr::createByteOff(value,0)); + } else { + write8(AddExpr::create(offset, + ConstantExpr::create(3, kMachinePointerType)), + ExtractExpr::createByteOff(value,3)); + write8(AddExpr::create(offset, + ConstantExpr::create(2, kMachinePointerType)), + ExtractExpr::createByteOff(value,2)); + write8(AddExpr::create(offset, + ConstantExpr::create(1, kMachinePointerType)), + ExtractExpr::createByteOff(value,1)); + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,0)); + } +} + +void ObjectState::write64(unsigned offset, uint64_t value) { + if (kMachineByteOrder == machine::MSB) { + write8(offset+0, (uint8_t) (value >> 56)); + write8(offset+1, (uint8_t) (value >> 48)); + write8(offset+2, (uint8_t) (value >> 40)); + write8(offset+3, (uint8_t) (value >> 32)); + write8(offset+4, (uint8_t) (value >> 24)); + write8(offset+5, (uint8_t) (value >> 16)); + write8(offset+6, (uint8_t) (value >> 8)); + write8(offset+7, (uint8_t) (value >> 0)); + } else { + write8(offset+7, (uint8_t) (value >> 56)); + write8(offset+6, (uint8_t) (value >> 48)); + write8(offset+5, (uint8_t) (value >> 40)); + write8(offset+4, (uint8_t) (value >> 32)); + write8(offset+3, (uint8_t) (value >> 24)); + write8(offset+2, (uint8_t) (value >> 16)); + write8(offset+1, (uint8_t) (value >> 8)); + write8(offset+0, (uint8_t) (value >> 0)); + } +} + +void ObjectState::write64(unsigned offset, ref<Expr> value) { + if (kMachineByteOrder == machine::MSB) { + write8(offset+0, ExtractExpr::createByteOff(value, 7)); + write8(offset+1, ExtractExpr::createByteOff(value, 6)); + write8(offset+2, ExtractExpr::createByteOff(value, 5)); + write8(offset+3, ExtractExpr::createByteOff(value, 4)); + write8(offset+4, ExtractExpr::createByteOff(value, 3)); + write8(offset+5, ExtractExpr::createByteOff(value, 2)); + write8(offset+6, ExtractExpr::createByteOff(value, 1)); + write8(offset+7, ExtractExpr::createByteOff(value, 0)); + } else { + write8(offset+7, ExtractExpr::createByteOff(value, 7)); + write8(offset+6, ExtractExpr::createByteOff(value, 6)); + write8(offset+5, ExtractExpr::createByteOff(value, 5)); + write8(offset+4, ExtractExpr::createByteOff(value, 4)); + write8(offset+3, ExtractExpr::createByteOff(value, 3)); + write8(offset+2, ExtractExpr::createByteOff(value, 2)); + write8(offset+1, ExtractExpr::createByteOff(value, 1)); + write8(offset+0, ExtractExpr::createByteOff(value, 0)); + } +} + +void ObjectState::write64(ref<Expr> offset, ref<Expr> value) { + if (kMachineByteOrder == machine::MSB) { + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,7)); + write8(AddExpr::create(offset, + ConstantExpr::create(1, kMachinePointerType)), + ExtractExpr::createByteOff(value,6)); + write8(AddExpr::create(offset, + ConstantExpr::create(2, kMachinePointerType)), + ExtractExpr::createByteOff(value,5)); + write8(AddExpr::create(offset, + ConstantExpr::create(3, kMachinePointerType)), + ExtractExpr::createByteOff(value,4)); + write8(AddExpr::create(offset, + ConstantExpr::create(4, kMachinePointerType)), + ExtractExpr::createByteOff(value,3)); + write8(AddExpr::create(offset, + ConstantExpr::create(5, kMachinePointerType)), + ExtractExpr::createByteOff(value,2)); + write8(AddExpr::create(offset, + ConstantExpr::create(6, kMachinePointerType)), + ExtractExpr::createByteOff(value,1)); + write8(AddExpr::create(offset, + ConstantExpr::create(7, kMachinePointerType)), + ExtractExpr::createByteOff(value,0)); + } else { + write8(AddExpr::create(offset, + ConstantExpr::create(7, kMachinePointerType)), + ExtractExpr::createByteOff(value,7)); + write8(AddExpr::create(offset, + ConstantExpr::create(6, kMachinePointerType)), + ExtractExpr::createByteOff(value,6)); + write8(AddExpr::create(offset, + ConstantExpr::create(5, kMachinePointerType)), + ExtractExpr::createByteOff(value,5)); + write8(AddExpr::create(offset, + ConstantExpr::create(4, kMachinePointerType)), + ExtractExpr::createByteOff(value,4)); + write8(AddExpr::create(offset, + ConstantExpr::create(3, kMachinePointerType)), + ExtractExpr::createByteOff(value,3)); + write8(AddExpr::create(offset, + ConstantExpr::create(2, kMachinePointerType)), + ExtractExpr::createByteOff(value,2)); + write8(AddExpr::create(offset, + ConstantExpr::create(1, kMachinePointerType)), + ExtractExpr::createByteOff(value,1)); + write8(AddExpr::create(offset, + ConstantExpr::create(0, kMachinePointerType)), + ExtractExpr::createByteOff(value,0)); + } +} + +void ObjectState::print() { + llvm::cerr << "-- ObjectState --\n"; + llvm::cerr << "\tMemoryObject ID: " << object->id << "\n"; + llvm::cerr << "\tRoot Object: " << updates.root << "\n"; + llvm::cerr << "\tIs Rooted? " << updates.isRooted << "\n"; + llvm::cerr << "\tSize: " << size << "\n"; + + llvm::cerr << "\tBytes:\n"; + for (unsigned i=0; i<size; i++) { + llvm::cerr << "\t\t["<<i<<"]" + << " concrete? " << isByteConcrete(i) + << " known-sym? " << isByteKnownSymbolic(i) + << " flushed? " << isByteFlushed(i) << " = "; + ref<Expr> e = read8(i); + llvm::cerr << e << "\n"; + } + + llvm::cerr << "\tUpdates:\n"; + for (const UpdateNode *un=updates.head; un; un=un->next) { + llvm::cerr << "\t\t[" << un->index << "] = " << un->value << "\n"; + } +} diff --git a/lib/Core/Memory.h b/lib/Core/Memory.h new file mode 100644 index 00000000..0f09b162 --- /dev/null +++ b/lib/Core/Memory.h @@ -0,0 +1,239 @@ +//===-- Memory.h ------------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_MEMORY_H +#define KLEE_MEMORY_H + +#include "klee/Expr.h" + +#include <vector> +#include <string> + +namespace llvm { + class Value; +} + +namespace klee { + +class BitArray; +class MemoryManager; +class Solver; + +class MemoryObject { + friend class STPBuilder; + +private: + static int counter; + +public: + unsigned id; + uint64_t address; + Array *array; + + /// size in bytes + unsigned size; + std::string name; + + bool isLocal; + bool isGlobal; + bool isFixed; + + /// true if created by us. + bool fake_object; + bool isUserSpecified; + + /// "Location" for which this memory object was allocated. This + /// should be either the allocating instruction or the global object + /// it was allocated for (or whatever else makes sense). + const llvm::Value *allocSite; + + /// A list of boolean expressions the user has requested be true of + /// a counterexample. Mutable since we play a little fast and loose + /// with allowing it to be added to during execution (although + /// should sensibly be only at creation time). + mutable std::vector< ref<Expr> > cexPreferences; + + // DO NOT IMPLEMENT + MemoryObject(const MemoryObject &b); + MemoryObject &operator=(const MemoryObject &b); + +public: + // XXX this is just a temp hack, should be removed + explicit + MemoryObject(uint64_t _address) + : id(counter++), + address(_address), + array(new Array(this, 0, id)), + size(0), + isFixed(true), + allocSite(0) { + } + + MemoryObject(uint64_t _address, unsigned _size, + bool _isLocal, bool _isGlobal, bool _isFixed, + const llvm::Value *_allocSite) + : id(counter++), + address(_address), + array(new Array(this, id, _size)), + size(_size), + name("unnamed"), + isLocal(_isLocal), + isGlobal(_isGlobal), + isFixed(_isFixed), + fake_object(false), + isUserSpecified(false), + allocSite(_allocSite) { + } + + ~MemoryObject(); + + /// Get an identifying string for this allocation. + void getAllocInfo(std::string &result) const; + + void setName(std::string name) { + this->name = name; + } + + ref<Expr> getBaseExpr() const { + return ConstantExpr::create(address, kMachinePointerType); + } + ref<Expr> getSizeExpr() const { + return ConstantExpr::create(size, kMachinePointerType); + } + ref<Expr> getOffsetExpr(ref<Expr> pointer) const { + return SubExpr::create(pointer, getBaseExpr()); + } + ref<Expr> getBoundsCheckPointer(ref<Expr> pointer) const { + return getBoundsCheckOffset(getOffsetExpr(pointer)); + } + ref<Expr> getBoundsCheckPointer(ref<Expr> pointer, unsigned bytes) const { + return getBoundsCheckOffset(getOffsetExpr(pointer), bytes); + } + + ref<Expr> getBoundsCheckOffset(ref<Expr> offset) const { + if (size==0) { + return EqExpr::create(offset, ref<Expr>(0, kMachinePointerType)); + } else { + return UltExpr::create(offset, getSizeExpr()); + } + } + ref<Expr> getBoundsCheckOffset(ref<Expr> offset, unsigned bytes) const { + if (bytes<=size) { + return UltExpr::create(offset, + ref<Expr>(size - bytes + 1, kMachinePointerType)); + } else { + return ref<Expr>(0, Expr::Bool); + } + } +}; + +class ObjectState { +private: + friend class AddressSpace; + unsigned copyOnWriteOwner; // exclusively for AddressSpace + + friend class ObjectHolder; + unsigned refCount; + + const MemoryObject *object; + + uint8_t *concreteStore; + // XXX cleanup name of flushMask (its backwards or something) + BitArray *concreteMask; + + // mutable because may need flushed during read of const + mutable BitArray *flushMask; + + ref<Expr> *knownSymbolics; + +public: + unsigned size; + + // mutable because we may need flush during read of const + mutable UpdateList updates; + + bool readOnly; + +public: + // initial contents are undefined but concrete, it is the creators + // responsibility to initialize the object contents appropriate + ObjectState(const MemoryObject *mo, unsigned size); + ObjectState(const ObjectState &os); + ~ObjectState(); + + const MemoryObject *getObject() const { return object; } + + void setReadOnly(bool ro) { readOnly = ro; } + + // make all bytes are concrete with undefined values + void makeConcrete(); + + void makeSymbolic(); + + // make contents all concrete and zero + void initializeToZero(); + // make contents all concrete and random + void initializeToRandom(); + + ref<Expr> read(ref<Expr> offset, Expr::Width width) const; + ref<Expr> read(unsigned offset, Expr::Width width) const; + ref<Expr> read1(unsigned offset) const; + ref<Expr> read8(unsigned offset) const; + ref<Expr> read16(unsigned offset) const; + ref<Expr> read32(unsigned offset) const; + ref<Expr> read64(unsigned offset) const; + + // return bytes written. + void write(unsigned offset, ref<Expr> value); + void write(ref<Expr> offset, ref<Expr> value); + + void write8(unsigned offset, uint8_t value); + void write16(unsigned offset, uint16_t value); + void write32(unsigned offset, uint32_t value); + void write64(unsigned offset, uint64_t value); + +private: + ref<Expr> read1(ref<Expr> offset) const; + ref<Expr> read8(ref<Expr> offset) const; + ref<Expr> read16(ref<Expr> offset) const; + ref<Expr> read32(ref<Expr> offset) const; + ref<Expr> read64(ref<Expr> offset) const; + + void write1(unsigned offset, ref<Expr> value); + void write1(ref<Expr> offset, ref<Expr> value); + void write8(unsigned offset, ref<Expr> value); + void write8(ref<Expr> offset, ref<Expr> value); + void write16(unsigned offset, ref<Expr> value); + void write16(ref<Expr> offset, ref<Expr> value); + void write32(unsigned offset, ref<Expr> value); + void write32(ref<Expr> offset, ref<Expr> value); + void write64(unsigned offset, ref<Expr> value); + void write64(ref<Expr> offset, ref<Expr> value); + + + void fastRangeCheckOffset(ref<Expr> offset, unsigned *base_r, unsigned *size_r) const; + void flushRangeForRead(unsigned rangeBase, unsigned rangeSize) const; + void flushRangeForWrite(unsigned rangeBase, unsigned rangeSize); + + bool isByteConcrete(unsigned offset) const; + bool isByteFlushed(unsigned offset) const; + bool isByteKnownSymbolic(unsigned offset) const; + + void markByteConcrete(unsigned offset); + void markByteSymbolic(unsigned offset); + void markByteFlushed(unsigned offset); + void markByteUnflushed(unsigned offset); + void setKnownSymbolic(unsigned offset, Expr *value); + + void print(); +}; + +} // End klee namespace + +#endif diff --git a/lib/Core/MemoryManager.cpp b/lib/Core/MemoryManager.cpp new file mode 100644 index 00000000..cec7b7d1 --- /dev/null +++ b/lib/Core/MemoryManager.cpp @@ -0,0 +1,69 @@ +//===-- MemoryManager.cpp -------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "CoreStats.h" +#include "Memory.h" +#include "MemoryManager.h" + +#include "klee/ExecutionState.h" +#include "klee/Expr.h" +#include "klee/Solver.h" + +#include "llvm/Support/CommandLine.h" + +using namespace klee; + +/***/ + +MemoryManager::~MemoryManager() { + while (!objects.empty()) { + MemoryObject *mo = objects.back(); + objects.pop_back(); + delete mo; + } +} + +MemoryObject *MemoryManager::allocate(uint64_t size, bool isLocal, bool isGlobal, + const llvm::Value *allocSite) { + if (size>10*1024*1024) { + klee_warning_once(0, "failing large alloc: %u bytes", (unsigned) size); + return 0; + } + uint64_t address = (uint64_t) (unsigned long) malloc((unsigned) size); + if (!address) + return 0; + + ++stats::allocations; + MemoryObject *res = new MemoryObject(address, size, isLocal, isGlobal, false, + allocSite); + objects.push_back(res); + return res; +} + +MemoryObject *MemoryManager::allocateFixed(uint64_t address, uint64_t size, + const llvm::Value *allocSite) { + for (objects_ty::iterator it = objects.begin(), ie = objects.end(); + it != ie; ++it) { + MemoryObject *mo = *it; + assert(!(address+size > mo->address && address < mo->address+mo->size) && + "allocated an overlapping object"); + } + + ++stats::allocations; + MemoryObject *res = new MemoryObject(address, size, false, true, true, + allocSite); + objects.push_back(res); + return res; +} + +void MemoryManager::deallocate(const MemoryObject *mo) { + assert(0); +} diff --git a/lib/Core/MemoryManager.h b/lib/Core/MemoryManager.h new file mode 100644 index 00000000..adb2ba22 --- /dev/null +++ b/lib/Core/MemoryManager.h @@ -0,0 +1,41 @@ +//===-- MemoryManager.h -----------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_MEMORYMANAGER_H +#define KLEE_MEMORYMANAGER_H + +#include <vector> +#include <stdint.h> + +namespace llvm { + class Value; +} + +namespace klee { + class MemoryObject; + + class MemoryManager { + private: + typedef std::vector<MemoryObject*> objects_ty; + objects_ty objects; + + public: + MemoryManager() {} + ~MemoryManager(); + + MemoryObject *allocate(uint64_t size, bool isLocal, bool isGlobal, + const llvm::Value *allocSite); + MemoryObject *allocateFixed(uint64_t address, uint64_t size, + const llvm::Value *allocSite); + void deallocate(const MemoryObject *mo); + }; + +} // End klee namespace + +#endif diff --git a/lib/Core/ObjectHolder.h b/lib/Core/ObjectHolder.h new file mode 100644 index 00000000..abf2c6f0 --- /dev/null +++ b/lib/Core/ObjectHolder.h @@ -0,0 +1,33 @@ +//===-- ObjectHolder.h ------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_OBJECTHOLDER_H +#define KLEE_OBJECTHOLDER_H + +namespace klee { + class ObjectState; + + class ObjectHolder { + ObjectState *os; + + public: + ObjectHolder() : os(0) {} + ObjectHolder(ObjectState *_os); + ObjectHolder(const ObjectHolder &b); + ~ObjectHolder(); + + ObjectHolder &operator=(const ObjectHolder &b); + + operator class ObjectState *() { return os; } + operator class ObjectState *() const { return (ObjectState*) os; } + }; +} + +#endif + diff --git a/lib/Core/PTree.cpp b/lib/Core/PTree.cpp new file mode 100644 index 00000000..349761cd --- /dev/null +++ b/lib/Core/PTree.cpp @@ -0,0 +1,103 @@ +//===-- PTree.cpp ---------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PTree.h" + +#include <klee/Expr.h> +#include <klee/util/ExprPPrinter.h> + +#include <vector> +#include <iostream> + +using namespace klee; + + /* *** */ + +PTree::PTree(const data_type &_root) : root(new Node(0,_root)) { +} + +PTree::~PTree() {} + +std::pair<PTreeNode*, PTreeNode*> +PTree::split(Node *n, + const data_type &leftData, + const data_type &rightData) { + assert(n && !n->left && !n->right); + n->left = new Node(n, leftData); + n->right = new Node(n, rightData); + return std::make_pair(n->left, n->right); +} + +void PTree::remove(Node *n) { + assert(!n->left && !n->right); + do { + Node *p = n->parent; + delete n; + if (p) { + if (n == p->left) { + p->left = 0; + } else { + assert(n == p->right); + p->right = 0; + } + } + n = p; + } while (n && !n->left && !n->right); +} + +void PTree::dump(std::ostream &os) { + ExprPPrinter *pp = ExprPPrinter::create(os); + pp->setNewline("\\l"); + os << "digraph G {\n"; + os << "\tsize=\"10,7.5\";\n"; + os << "\tratio=fill;\n"; + os << "\trotate=90;\n"; + os << "\tcenter = \"true\";\n"; + os << "\tnode [style=\"filled\",width=.1,height=.1,fontname=\"Terminus\"]\n"; + os << "\tedge [arrowsize=.3]\n"; + std::vector<PTree::Node*> stack; + stack.push_back(root); + while (!stack.empty()) { + PTree::Node *n = stack.back(); + stack.pop_back(); + if (n->condition.isNull()) { + os << "\tn" << n << " [label=\"\""; + } else { + os << "\tn" << n << " [label=\""; + pp->print(n->condition); + os << "\",shape=diamond"; + } + if (n->data) + os << ",fillcolor=green"; + os << "];\n"; + if (n->left) { + os << "\tn" << n << " -> n" << n->left << ";\n"; + stack.push_back(n->left); + } + if (n->right) { + os << "\tn" << n << " -> n" << n->right << ";\n"; + stack.push_back(n->right); + } + } + os << "}\n"; + delete pp; +} + +PTreeNode::PTreeNode(PTreeNode *_parent, + ExecutionState *_data) + : parent(_parent), + left(0), + right(0), + data(_data), + condition(0) { +} + +PTreeNode::~PTreeNode() { +} + diff --git a/lib/Core/PTree.h b/lib/Core/PTree.h new file mode 100644 index 00000000..6accc8e2 --- /dev/null +++ b/lib/Core/PTree.h @@ -0,0 +1,53 @@ +//===-- PTree.h -------------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef __UTIL_PTREE_H__ +#define __UTIL_PTREE_H__ + +#include <klee/Expr.h> + +#include <utility> +#include <cassert> +#include <iostream> + +namespace klee { + class ExecutionState; + + class PTree { + typedef ExecutionState* data_type; + + public: + typedef class PTreeNode Node; + Node *root; + + PTree(const data_type &_root); + ~PTree(); + + std::pair<Node*,Node*> split(Node *n, + const data_type &leftData, + const data_type &rightData); + void remove(Node *n); + + void dump(std::ostream &os); + }; + + class PTreeNode { + friend class PTree; + public: + PTreeNode *parent, *left, *right; + ExecutionState *data; + ref<Expr> condition; + + private: + PTreeNode(PTreeNode *_parent, ExecutionState *_data); + ~PTreeNode(); + }; +} + +#endif diff --git a/lib/Core/Searcher.cpp b/lib/Core/Searcher.cpp new file mode 100644 index 00000000..4c94c59b --- /dev/null +++ b/lib/Core/Searcher.cpp @@ -0,0 +1,575 @@ +//===-- Searcher.cpp ------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "Searcher.h" + +#include "CoreStats.h" +#include "Executor.h" +#include "PTree.h" +#include "StatsTracker.h" + +#include "klee/ExecutionState.h" +#include "klee/Statistics.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/KModule.h" +#include "klee/Internal/ADT/DiscretePDF.h" +#include "klee/Internal/ADT/RNG.h" +#include "klee/Internal/Support/ModuleUtil.h" +#include "klee/Internal/System/Time.h" + +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" + +#include <cassert> +#include <fstream> +#include <climits> + +using namespace klee; +using namespace llvm; + +namespace { + cl::opt<bool> + DebugLogMerge("debug-log-merge"); +} + +namespace klee { + extern RNG theRNG; +} + +Searcher::~Searcher() { +} + +/// + +ExecutionState &DFSSearcher::selectState() { + return *states.back(); +} + +void DFSSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + states.insert(states.end(), + addedStates.begin(), + addedStates.end()); + for (std::set<ExecutionState*>::const_iterator it = removedStates.begin(), + ie = removedStates.end(); it != ie; ++it) { + ExecutionState *es = *it; + if (es == states.back()) { + states.pop_back(); + } else { + bool ok = false; + + for (std::vector<ExecutionState*>::iterator it = states.begin(), + ie = states.end(); it != ie; ++it) { + if (es==*it) { + states.erase(it); + ok = true; + break; + } + } + + assert(ok && "invalid state removed"); + } + } +} + +/// + +ExecutionState &RandomSearcher::selectState() { + return *states[theRNG.getInt32()%states.size()]; +} + +void RandomSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + states.insert(states.end(), + addedStates.begin(), + addedStates.end()); + for (std::set<ExecutionState*>::const_iterator it = removedStates.begin(), + ie = removedStates.end(); it != ie; ++it) { + ExecutionState *es = *it; + bool ok = false; + + for (std::vector<ExecutionState*>::iterator it = states.begin(), + ie = states.end(); it != ie; ++it) { + if (es==*it) { + states.erase(it); + ok = true; + break; + } + } + + assert(ok && "invalid state removed"); + } +} + +/// + +WeightedRandomSearcher::WeightedRandomSearcher(Executor &_executor, + WeightType _type) + : executor(_executor), + states(new DiscretePDF<ExecutionState*>()), + type(_type) { + switch(type) { + case Depth: + updateWeights = false; + break; + case InstCount: + case CPInstCount: + case QueryCost: + case MinDistToUncovered: + case CoveringNew: + updateWeights = true; + break; + default: + assert(0 && "invalid weight type"); + } +} + +WeightedRandomSearcher::~WeightedRandomSearcher() { + delete states; +} + +ExecutionState &WeightedRandomSearcher::selectState() { + return *states->choose(theRNG.getDoubleL()); +} + +double WeightedRandomSearcher::getWeight(ExecutionState *es) { + switch(type) { + default: + case Depth: + return es->weight; + case InstCount: { + uint64_t count = theStatisticManager->getIndexedValue(stats::instructions, + es->pc->info->id); + double inv = 1. / std::max((uint64_t) 1, count); + return inv * inv; + } + case CPInstCount: { + StackFrame &sf = es->stack.back(); + uint64_t count = sf.callPathNode->statistics.getValue(stats::instructions); + double inv = 1. / std::max((uint64_t) 1, count); + return inv; + } + case QueryCost: + return (es->queryCost < .1) ? 1. : 1./es->queryCost; + case CoveringNew: + case MinDistToUncovered: { + uint64_t md2u = computeMinDistToUncovered(es->pc, + es->stack.back().minDistToUncoveredOnReturn); + + double invMD2U = 1. / (md2u ? md2u : 10000); + if (type==CoveringNew) { + double invCovNew = 0.; + if (es->instsSinceCovNew) + invCovNew = 1. / std::max(1, (int) es->instsSinceCovNew - 1000); + return (invCovNew * invCovNew + invMD2U * invMD2U); + } else { + return invMD2U * invMD2U; + } + } + } +} + +void WeightedRandomSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + if (current && updateWeights && !removedStates.count(current)) + states->update(current, getWeight(current)); + + for (std::set<ExecutionState*>::const_iterator it = addedStates.begin(), + ie = addedStates.end(); it != ie; ++it) { + ExecutionState *es = *it; + states->insert(es, getWeight(es)); + } + + for (std::set<ExecutionState*>::const_iterator it = removedStates.begin(), + ie = removedStates.end(); it != ie; ++it) { + states->remove(*it); + } +} + +bool WeightedRandomSearcher::empty() { + return states->empty(); +} + +/// + +RandomPathSearcher::RandomPathSearcher(Executor &_executor) + : executor(_executor) { +} + +RandomPathSearcher::~RandomPathSearcher() { +} + +ExecutionState &RandomPathSearcher::selectState() { + unsigned flips=0, bits=0; + PTree::Node *n = executor.processTree->root; + + while (!n->data) { + if (!n->left) { + n = n->right; + } else if (!n->right) { + n = n->left; + } else { + if (bits==0) { + flips = theRNG.getInt32(); + bits = 32; + } + --bits; + n = (flips&(1<<bits)) ? n->left : n->right; + } + } + + return *n->data; +} + +void RandomPathSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { +} + +bool RandomPathSearcher::empty() { + return executor.states.empty(); +} + +/// + +BumpMergingSearcher::BumpMergingSearcher(Executor &_executor, Searcher *_baseSearcher) + : executor(_executor), + baseSearcher(_baseSearcher), + mergeFunction(executor.kmodule->kleeMergeFn) { +} + +BumpMergingSearcher::~BumpMergingSearcher() { + delete baseSearcher; +} + +/// + +Instruction *BumpMergingSearcher::getMergePoint(ExecutionState &es) { + if (mergeFunction) { + Instruction *i = es.pc->inst; + + if (i->getOpcode()==Instruction::Call) { + CallSite cs(cast<CallInst>(i)); + if (mergeFunction==cs.getCalledFunction()) + return i; + } + } + + return 0; +} + +ExecutionState &BumpMergingSearcher::selectState() { +entry: + // out of base states, pick one to pop + if (baseSearcher->empty()) { + std::map<llvm::Instruction*, ExecutionState*>::iterator it = + statesAtMerge.begin(); + ExecutionState *es = it->second; + statesAtMerge.erase(it); + ++es->pc; + + baseSearcher->addState(es); + } + + ExecutionState &es = baseSearcher->selectState(); + + if (Instruction *mp = getMergePoint(es)) { + std::map<llvm::Instruction*, ExecutionState*>::iterator it = + statesAtMerge.find(mp); + + baseSearcher->removeState(&es); + + if (it==statesAtMerge.end()) { + statesAtMerge.insert(std::make_pair(mp, &es)); + } else { + ExecutionState *mergeWith = it->second; + if (mergeWith->merge(es)) { + // hack, because we are terminating the state we need to let + // the baseSearcher know about it again + baseSearcher->addState(&es); + executor.terminateState(es); + } else { + it->second = &es; // the bump + ++mergeWith->pc; + + baseSearcher->addState(mergeWith); + } + } + + goto entry; + } else { + return es; + } +} + +void BumpMergingSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + baseSearcher->update(current, addedStates, removedStates); +} + +/// + +MergingSearcher::MergingSearcher(Executor &_executor, Searcher *_baseSearcher) + : executor(_executor), + baseSearcher(_baseSearcher), + mergeFunction(executor.kmodule->kleeMergeFn) { +} + +MergingSearcher::~MergingSearcher() { + delete baseSearcher; +} + +/// + +Instruction *MergingSearcher::getMergePoint(ExecutionState &es) { + if (mergeFunction) { + Instruction *i = es.pc->inst; + + if (i->getOpcode()==Instruction::Call) { + CallSite cs(cast<CallInst>(i)); + if (mergeFunction==cs.getCalledFunction()) + return i; + } + } + + return 0; +} + +ExecutionState &MergingSearcher::selectState() { + while (!baseSearcher->empty()) { + ExecutionState &es = baseSearcher->selectState(); + if (getMergePoint(es)) { + baseSearcher->removeState(&es, &es); + statesAtMerge.insert(&es); + } else { + return es; + } + } + + // build map of merge point -> state list + std::map<Instruction*, std::vector<ExecutionState*> > merges; + for (std::set<ExecutionState*>::const_iterator it = statesAtMerge.begin(), + ie = statesAtMerge.end(); it != ie; ++it) { + ExecutionState &state = **it; + Instruction *mp = getMergePoint(state); + + merges[mp].push_back(&state); + } + + if (DebugLogMerge) + llvm::cerr << "-- all at merge --\n"; + for (std::map<Instruction*, std::vector<ExecutionState*> >::iterator + it = merges.begin(), ie = merges.end(); it != ie; ++it) { + if (DebugLogMerge) { + llvm::cerr << "\tmerge: " << it->first << " ["; + for (std::vector<ExecutionState*>::iterator it2 = it->second.begin(), + ie2 = it->second.end(); it2 != ie2; ++it2) { + ExecutionState *state = *it2; + llvm::cerr << state << ", "; + } + llvm::cerr << "]\n"; + } + + // merge states + std::set<ExecutionState*> toMerge(it->second.begin(), it->second.end()); + while (!toMerge.empty()) { + ExecutionState *base = *toMerge.begin(); + toMerge.erase(toMerge.begin()); + + std::set<ExecutionState*> toErase; + for (std::set<ExecutionState*>::iterator it = toMerge.begin(), + ie = toMerge.end(); it != ie; ++it) { + ExecutionState *mergeWith = *it; + + if (base->merge(*mergeWith)) { + toErase.insert(mergeWith); + } + } + if (DebugLogMerge && !toErase.empty()) { + llvm::cerr << "\t\tmerged: " << base << " with ["; + for (std::set<ExecutionState*>::iterator it = toErase.begin(), + ie = toErase.end(); it != ie; ++it) { + if (it!=toErase.begin()) llvm::cerr << ", "; + llvm::cerr << *it; + } + llvm::cerr << "]\n"; + } + for (std::set<ExecutionState*>::iterator it = toErase.begin(), + ie = toErase.end(); it != ie; ++it) { + std::set<ExecutionState*>::iterator it2 = toMerge.find(*it); + assert(it2!=toMerge.end()); + executor.terminateState(**it); + toMerge.erase(it2); + } + + // step past merge and toss base back in pool + statesAtMerge.erase(statesAtMerge.find(base)); + ++base->pc; + baseSearcher->addState(base); + } + } + + if (DebugLogMerge) + llvm::cerr << "-- merge complete, continuing --\n"; + + return selectState(); +} + +void MergingSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + if (!removedStates.empty()) { + std::set<ExecutionState *> alt = removedStates; + for (std::set<ExecutionState*>::const_iterator it = removedStates.begin(), + ie = removedStates.end(); it != ie; ++it) { + ExecutionState *es = *it; + std::set<ExecutionState*>::const_iterator it = statesAtMerge.find(es); + if (it!=statesAtMerge.end()) { + statesAtMerge.erase(it); + alt.erase(alt.find(es)); + } + } + baseSearcher->update(current, addedStates, alt); + } else { + baseSearcher->update(current, addedStates, removedStates); + } +} + +/// + +BatchingSearcher::BatchingSearcher(Searcher *_baseSearcher, + double _timeBudget, + unsigned _instructionBudget) + : baseSearcher(_baseSearcher), + timeBudget(_timeBudget), + instructionBudget(_instructionBudget), + lastState(0) { + +} + +BatchingSearcher::~BatchingSearcher() { + delete baseSearcher; +} + +ExecutionState &BatchingSearcher::selectState() { + if (!lastState || + (util::getWallTime()-lastStartTime)>timeBudget || + (stats::instructions-lastStartInstructions)>instructionBudget) { + if (lastState) { + double delta = util::getWallTime()-lastStartTime; + if (delta>timeBudget*1.1) { + llvm::cerr << "KLEE: increased time budget from " << timeBudget << " to " << delta << "\n"; + timeBudget = delta; + } + } + lastState = &baseSearcher->selectState(); + lastStartTime = util::getWallTime(); + lastStartInstructions = stats::instructions; + return *lastState; + } else { + return *lastState; + } +} + +void BatchingSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + if (removedStates.count(lastState)) + lastState = 0; + baseSearcher->update(current, addedStates, removedStates); +} + +/***/ + +IterativeDeepeningTimeSearcher::IterativeDeepeningTimeSearcher(Searcher *_baseSearcher) + : baseSearcher(_baseSearcher), + time(1.) { +} + +IterativeDeepeningTimeSearcher::~IterativeDeepeningTimeSearcher() { + delete baseSearcher; +} + +ExecutionState &IterativeDeepeningTimeSearcher::selectState() { + ExecutionState &res = baseSearcher->selectState(); + startTime = util::getWallTime(); + return res; +} + +void IterativeDeepeningTimeSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + double elapsed = util::getWallTime() - startTime; + + if (!removedStates.empty()) { + std::set<ExecutionState *> alt = removedStates; + for (std::set<ExecutionState*>::const_iterator it = removedStates.begin(), + ie = removedStates.end(); it != ie; ++it) { + ExecutionState *es = *it; + std::set<ExecutionState*>::const_iterator it = pausedStates.find(es); + if (it!=pausedStates.end()) { + pausedStates.erase(it); + alt.erase(alt.find(es)); + } + } + baseSearcher->update(current, addedStates, alt); + } else { + baseSearcher->update(current, addedStates, removedStates); + } + + if (current && !removedStates.count(current) && elapsed>time) { + pausedStates.insert(current); + baseSearcher->removeState(current); + } + + if (baseSearcher->empty()) { + time *= 2; + llvm::cerr << "KLEE: increasing time budget to: " << time << "\n"; + baseSearcher->update(0, pausedStates, std::set<ExecutionState*>()); + pausedStates.clear(); + } +} + +/***/ + +InterleavedSearcher::InterleavedSearcher(const std::vector<Searcher*> &_searchers) + : searchers(_searchers), + index(1) { +} + +InterleavedSearcher::~InterleavedSearcher() { + for (std::vector<Searcher*>::const_iterator it = searchers.begin(), + ie = searchers.end(); it != ie; ++it) + delete *it; +} + +ExecutionState &InterleavedSearcher::selectState() { + Searcher *s = searchers[--index]; + if (index==0) index = searchers.size(); + return s->selectState(); +} + +void InterleavedSearcher::update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) { + for (std::vector<Searcher*>::const_iterator it = searchers.begin(), + ie = searchers.end(); it != ie; ++it) + (*it)->update(current, addedStates, removedStates); +} diff --git a/lib/Core/Searcher.h b/lib/Core/Searcher.h new file mode 100644 index 00000000..455a7679 --- /dev/null +++ b/lib/Core/Searcher.h @@ -0,0 +1,279 @@ +//===-- Searcher.h ----------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_SEARCHER_H +#define KLEE_SEARCHER_H + +#include <vector> +#include <set> +#include <map> +#include <queue> + +// FIXME: Move out of header, use llvm streams. +#include <ostream> + +namespace llvm { + class BasicBlock; + class Function; + class Instruction; +} + +namespace klee { + template<class T> class DiscretePDF; + class ExecutionState; + class Executor; + + class Searcher { + public: + virtual ~Searcher(); + + virtual ExecutionState &selectState() = 0; + + virtual void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates) = 0; + + virtual bool empty() = 0; + + // prints name of searcher as a klee_message() + // TODO: could probably make prettier or more flexible + virtual void printName(std::ostream &os) { + os << "<unnamed searcher>\n"; + } + + // pgbovine - to be called when a searcher gets activated and + // deactivated, say, by a higher-level searcher; most searchers + // don't need this functionality, so don't have to override. + virtual void activate() {}; + virtual void deactivate() {}; + + // utility functions + + void addState(ExecutionState *es, ExecutionState *current = 0) { + std::set<ExecutionState*> tmp; + tmp.insert(es); + update(current, tmp, std::set<ExecutionState*>()); + } + + void removeState(ExecutionState *es, ExecutionState *current = 0) { + std::set<ExecutionState*> tmp; + tmp.insert(es); + update(current, std::set<ExecutionState*>(), tmp); + } + }; + + class DFSSearcher : public Searcher { + std::vector<ExecutionState*> states; + + public: + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return states.empty(); } + void printName(std::ostream &os) { + os << "DFSSearcher\n"; + } + }; + + class RandomSearcher : public Searcher { + std::vector<ExecutionState*> states; + + public: + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return states.empty(); } + void printName(std::ostream &os) { + os << "RandomSearcher\n"; + } + }; + + class WeightedRandomSearcher : public Searcher { + public: + enum WeightType { + Depth, + QueryCost, + InstCount, + CPInstCount, + MinDistToUncovered, + CoveringNew + }; + + private: + Executor &executor; + DiscretePDF<ExecutionState*> *states; + WeightType type; + bool updateWeights; + + double getWeight(ExecutionState*); + + public: + WeightedRandomSearcher(Executor &executor, WeightType type); + ~WeightedRandomSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty(); + void printName(std::ostream &os) { + os << "WeightedRandomSearcher::"; + switch(type) { + case Depth : os << "Depth\n"; return; + case QueryCost : os << "QueryCost\n"; return; + case InstCount : os << "InstCount\n"; return; + case CPInstCount : os << "CPInstCount\n"; return; + case MinDistToUncovered : os << "MinDistToUncovered\n"; return; + case CoveringNew : os << "CoveringNew\n"; return; + default : os << "<unknown type>\n"; return; + } + } + }; + + class RandomPathSearcher : public Searcher { + Executor &executor; + + public: + RandomPathSearcher(Executor &_executor); + ~RandomPathSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty(); + void printName(std::ostream &os) { + os << "RandomPathSearcher\n"; + } + }; + + class MergingSearcher : public Searcher { + Executor &executor; + std::set<ExecutionState*> statesAtMerge; + Searcher *baseSearcher; + llvm::Function *mergeFunction; + + private: + llvm::Instruction *getMergePoint(ExecutionState &es); + + public: + MergingSearcher(Executor &executor, Searcher *baseSearcher); + ~MergingSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return baseSearcher->empty() && statesAtMerge.empty(); } + void printName(std::ostream &os) { + os << "MergingSearcher\n"; + } + }; + + class BumpMergingSearcher : public Searcher { + Executor &executor; + std::map<llvm::Instruction*, ExecutionState*> statesAtMerge; + Searcher *baseSearcher; + llvm::Function *mergeFunction; + + private: + llvm::Instruction *getMergePoint(ExecutionState &es); + + public: + BumpMergingSearcher(Executor &executor, Searcher *baseSearcher); + ~BumpMergingSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return baseSearcher->empty() && statesAtMerge.empty(); } + void printName(std::ostream &os) { + os << "BumpMergingSearcher\n"; + } + }; + + class BatchingSearcher : public Searcher { + Searcher *baseSearcher; + double timeBudget; + unsigned instructionBudget; + + ExecutionState *lastState; + double lastStartTime; + unsigned lastStartInstructions; + + public: + BatchingSearcher(Searcher *baseSearcher, + double _timeBudget, + unsigned _instructionBudget); + ~BatchingSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return baseSearcher->empty(); } + void printName(std::ostream &os) { + os << "<BatchingSearcher> timeBudget: " << timeBudget + << ", instructionBudget: " << instructionBudget + << ", baseSearcher:\n"; + baseSearcher->printName(os); + os << "</BatchingSearcher>\n"; + } + }; + + class IterativeDeepeningTimeSearcher : public Searcher { + Searcher *baseSearcher; + double time, startTime; + std::set<ExecutionState*> pausedStates; + + public: + IterativeDeepeningTimeSearcher(Searcher *baseSearcher); + ~IterativeDeepeningTimeSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return baseSearcher->empty() && pausedStates.empty(); } + void printName(std::ostream &os) { + os << "IterativeDeepeningTimeSearcher\n"; + } + }; + + class InterleavedSearcher : public Searcher { + typedef std::vector<Searcher*> searchers_ty; + + searchers_ty searchers; + unsigned index; + + public: + explicit InterleavedSearcher(const searchers_ty &_searchers); + ~InterleavedSearcher(); + + ExecutionState &selectState(); + void update(ExecutionState *current, + const std::set<ExecutionState*> &addedStates, + const std::set<ExecutionState*> &removedStates); + bool empty() { return searchers[0]->empty(); } + void printName(std::ostream &os) { + os << "<InterleavedSearcher> containing " + << searchers.size() << " searchers:\n"; + for (searchers_ty::iterator it = searchers.begin(), ie = searchers.end(); + it != ie; ++it) + (*it)->printName(os); + os << "</InterleavedSearcher>\n"; + } + }; + +} + +#endif diff --git a/lib/Core/SeedInfo.cpp b/lib/Core/SeedInfo.cpp new file mode 100644 index 00000000..d76d75dc --- /dev/null +++ b/lib/Core/SeedInfo.cpp @@ -0,0 +1,151 @@ +//===-- SeedInfo.cpp ------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "Memory.h" +#include "SeedInfo.h" +#include "TimingSolver.h" + +#include "klee/ExecutionState.h" +#include "klee/Expr.h" +#include "klee/util/ExprUtil.h" +#include "klee/Internal/ADT/BOut.h" + +using namespace klee; + +BOutObject *SeedInfo::getNextInput(const MemoryObject *mo, + bool byName) { + if (byName) { + unsigned i; + + for (i=0; i<input->numObjects; ++i) { + BOutObject *obj = &input->objects[i]; + if (std::string(obj->name) == mo->name) + if (used.insert(obj).second) + return obj; + } + + // If first unused input matches in size then accept that as + // well. + for (i=0; i<input->numObjects; ++i) + if (!used.count(&input->objects[i])) + break; + if (i<input->numObjects) { + BOutObject *obj = &input->objects[i]; + if (obj->numBytes == mo->size) { + used.insert(obj); + klee_warning_once(mo, "using seed input %s[%d] for: %s (no name match)", + obj->name, obj->numBytes, mo->name.c_str()); + return obj; + } + } + + klee_warning_once(mo, "no seed input for: %s", mo->name.c_str()); + return 0; + } else { + if (inputPosition >= input->numObjects) { + return 0; + } else { + return &input->objects[inputPosition++]; + } + } +} + +void SeedInfo::patchSeed(const ExecutionState &state, + ref<Expr> condition, + TimingSolver *solver) { + std::vector< ref<Expr> > required(state.constraints.begin(), + state.constraints.end()); + ExecutionState tmp(required); + tmp.addConstraint(condition); + + // Try and patch direct reads first, this is likely to resolve the + // problem quickly and avoids long traversal of all seed + // values. There are other smart ways to do this, the nicest is if + // we got a minimal counterexample from STP, in which case we would + // just inject those values back into the seed. + std::set< std::pair<const Array*, unsigned> > directReads; + std::vector< ref<ReadExpr> > reads; + findReads(condition, false, reads); + for (std::vector< ref<ReadExpr> >::iterator it = reads.begin(), + ie = reads.end(); it != ie; ++it) { + ReadExpr *re = it->get(); + if (re->index.isConstant()) { + unsigned index = (unsigned) re->index.getConstantValue(); + directReads.insert(std::make_pair(re->updates.root, index)); + } + } + + for (std::set< std::pair<const Array*, unsigned> >::iterator + it = directReads.begin(), ie = directReads.end(); it != ie; ++it) { + const Array *array = it->first; + unsigned i = it->second; + ref<Expr> read = ReadExpr::create(UpdateList(array, true, 0), + ref<Expr>(i, Expr::Int32)); + + // If not in bindings then this can't be a violation? + Assignment::bindings_ty::iterator it2 = assignment.bindings.find(array); + if (it2 != assignment.bindings.end()) { + ref<Expr> isSeed = EqExpr::create(read, ref<Expr>(it2->second[i], Expr::Int8)); + bool res; + bool success = solver->mustBeFalse(tmp, isSeed, res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + ref<Expr> value; + bool success = solver->getValue(tmp, read, value); + assert(success && "FIXME: Unhandled solver failure"); + it2->second[i] = value.getConstantValue(); + tmp.addConstraint(EqExpr::create(read, ref<Expr>(it2->second[i], Expr::Int8))); + } else { + tmp.addConstraint(isSeed); + } + } + } + + bool res; + bool success = solver->mayBeTrue(state, assignment.evaluate(condition), res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) + return; + + // We could still do a lot better than this, for example by looking at + // independence. But really, this shouldn't be happening often. + for (Assignment::bindings_ty::iterator it = assignment.bindings.begin(), + ie = assignment.bindings.end(); it != ie; ++it) { + const Array *array = it->first; + for (unsigned i=0; i<array->size; ++i) { + ref<Expr> read = ReadExpr::create(UpdateList(array, true, 0), + ref<Expr>(i, Expr::Int32)); + ref<Expr> isSeed = EqExpr::create(read, ref<Expr>(it->second[i], Expr::Int8)); + bool res; + bool success = solver->mustBeFalse(tmp, isSeed, res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + ref<Expr> value; + bool success = solver->getValue(tmp, read, value); + assert(success && "FIXME: Unhandled solver failure"); + it->second[i] = value.getConstantValue(); + tmp.addConstraint(EqExpr::create(read, ref<Expr>(it->second[i], Expr::Int8))); + } else { + tmp.addConstraint(isSeed); + } + } + } + +#ifndef NDEBUG + { + bool res; + bool success = + solver->mayBeTrue(state, assignment.evaluate(condition), res); + assert(success && "FIXME: Unhandled solver failure"); + assert(res && "seed patching failed"); + } +#endif +} diff --git a/lib/Core/SeedInfo.h b/lib/Core/SeedInfo.h new file mode 100644 index 00000000..dd151ed0 --- /dev/null +++ b/lib/Core/SeedInfo.h @@ -0,0 +1,48 @@ +//===-- SeedInfo.h ----------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_SEEDINFO_H +#define KLEE_SEEDINFO_H + +#include "klee/util/Assignment.h" + +extern "C" { + struct BOut; + struct BOutObject; +} + +namespace klee { + class ExecutionState; + class TimingSolver; + + class SeedInfo { + public: + Assignment assignment; + BOut *input; + unsigned inputPosition; + std::set<struct BOutObject*> used; + + public: + explicit + SeedInfo(BOut *_input) : assignment(true), + input(_input), + inputPosition(0) {} + + BOutObject *getNextInput(const MemoryObject *mo, + bool byName); + + /// Patch the seed so that condition is satisfied while retaining as + /// many of the seed values as possible. + void patchSeed(const ExecutionState &state, + ref<Expr> condition, + TimingSolver *solver); + }; +} + +#endif diff --git a/lib/Core/SpecialFunctionHandler.cpp b/lib/Core/SpecialFunctionHandler.cpp new file mode 100644 index 00000000..da2a4a49 --- /dev/null +++ b/lib/Core/SpecialFunctionHandler.cpp @@ -0,0 +1,727 @@ +//===-- SpecialFunctionHandler.cpp ----------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "Memory.h" +#include "SpecialFunctionHandler.h" +#include "TimingSolver.h" + +#include "klee/ExecutionState.h" + +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/KModule.h" + +#include "Executor.h" +#include "MemoryManager.h" + +#include "llvm/Module.h" + +#include <errno.h> + +using namespace llvm; +using namespace klee; + +/// \todo Almost all of the demands in this file should be replaced +/// with terminateState calls. + +/// + +struct HandlerInfo { + const char *name; + SpecialFunctionHandler::Handler handler; + bool doesNotReturn; /// Intrinsic terminates the process + bool hasReturnValue; /// Intrinsic has a return value + bool doNotOverride; /// Intrinsic should not be used if already defined +}; + +// FIXME: We are more or less committed to requiring an intrinsic +// library these days. We can move some of this stuff there, +// especially things like realloc which have complicated semantics +// w.r.t. forking. Among other things this makes delayed query +// dispatch easier to implement. +HandlerInfo handlerInfo[] = { +#define add(name, handler, ret) { name, \ + &SpecialFunctionHandler::handler, \ + false, ret, false } +#define addDNR(name, handler) { name, \ + &SpecialFunctionHandler::handler, \ + true, false, false } + addDNR("__assert_rtn", handleAssertFail), + addDNR("__assert_fail", handleAssertFail), + addDNR("_assert", handleAssert), + addDNR("abort", handleAbort), + addDNR("_exit", handleExit), + { "exit", &SpecialFunctionHandler::handleExit, true, false, true }, + addDNR("klee_abort", handleAbort), + addDNR("klee_silent_exit", handleSilentExit), + addDNR("klee_report_error", handleReportError), + + add("calloc", handleCalloc, true), + add("free", handleFree, false), + add("klee_assume", handleAssume, false), + add("klee_check_memory_access", handleCheckMemoryAccess, false), + add("klee_get_value", handleGetValue, true), + add("klee_define_fixed_object", handleDefineFixedObject, false), + add("klee_get_obj_size", handleGetObjSize, true), + add("klee_get_errno", handleGetErrno, true), + add("klee_is_symbolic", handleIsSymbolic, true), + add("klee_make_symbolic_name", handleMakeSymbolic, false), + add("klee_mark_global", handleMarkGlobal, false), + add("klee_malloc_n", handleMallocN, true), + add("klee_merge", handleMerge, false), + add("klee_prefer_cex", handlePreferCex, false), + add("klee_print_expr", handlePrintExpr, false), + add("klee_print_range", handlePrintRange, false), + add("klee_set_forking", handleSetForking, false), + add("klee_warning", handleWarning, false), + add("klee_warning_once", handleWarningOnce, false), + add("klee_under_constrained", handleUnderConstrained, false), + add("klee_alias_function", handleAliasFunction, false), + add("malloc", handleMalloc, true), + add("realloc", handleRealloc, true), + + // operator delete[](void*) + add("_ZdaPv", handleDeleteArray, false), + // operator delete(void*) + add("_ZdlPv", handleDelete, false), + + // operator new[](unsigned int) + add("_Znaj", handleNewArray, true), + // operator new(unsigned int) + add("_Znwj", handleNew, true), + + // FIXME-64: This is wrong for 64-bit long... + + // operator new[](unsigned long) + add("_Znam", handleNewArray, true), + // operator new(unsigned long) + add("_Znwm", handleNew, true), + +#undef addDNR +#undef add +}; + +SpecialFunctionHandler::SpecialFunctionHandler(Executor &_executor) + : executor(_executor) {} + + +void SpecialFunctionHandler::prepare() { + unsigned N = sizeof(handlerInfo)/sizeof(handlerInfo[0]); + + for (unsigned i=0; i<N; ++i) { + HandlerInfo &hi = handlerInfo[i]; + Function *f = executor.kmodule->module->getFunction(hi.name); + + // No need to create if the function doesn't exist, since it cannot + // be called in that case. + + if (f && (!hi.doNotOverride || f->isDeclaration())) { + // Make sure NoReturn attribute is set, for optimization and + // coverage counting. + if (hi.doesNotReturn) + f->addFnAttr(Attribute::NoReturn); + + // Change to a declaration since we handle internally (simplifies + // module and allows deleting dead code). + if (!f->isDeclaration()) + f->deleteBody(); + } + } +} + +void SpecialFunctionHandler::bind() { + unsigned N = sizeof(handlerInfo)/sizeof(handlerInfo[0]); + + for (unsigned i=0; i<N; ++i) { + HandlerInfo &hi = handlerInfo[i]; + Function *f = executor.kmodule->module->getFunction(hi.name); + + if (f && (!hi.doNotOverride || f->isDeclaration())) + handlers[f] = std::make_pair(hi.handler, hi.hasReturnValue); + } +} + + +bool SpecialFunctionHandler::handle(ExecutionState &state, + Function *f, + KInstruction *target, + std::vector< ref<Expr> > &arguments) { + handlers_ty::iterator it = handlers.find(f); + if (it != handlers.end()) { + Handler h = it->second.first; + bool hasReturnValue = it->second.second; + // FIXME: Check this... add test? + if (!hasReturnValue && !target->inst->use_empty()) { + executor.terminateStateOnExecError(state, + "expected return value from void special function"); + } else { + (this->*h)(state, target, arguments); + } + return true; + } else { + return false; + } +} + +/****/ + +// reads a concrete string from memory +std::string SpecialFunctionHandler::readStringAtAddress(ExecutionState &state, + ref<Expr> address) { + ObjectPair op; + address = executor.toUnique(state, address); + assert(address.isConstant() && "symbolic string arg to intrinsic"); + if (!state.addressSpace.resolveOne(address.getConstantValue(), op)) + assert(0 && "XXX out of bounds / multiple resolution unhandled"); + bool res; + assert(executor.solver->mustBeTrue(state, + EqExpr::create(address, + op.first->getBaseExpr()), + res) && + res && + "XXX interior pointer unhandled"); + const MemoryObject *mo = op.first; + const ObjectState *os = op.second; + + char *buf = new char[mo->size]; + + unsigned i; + for (i = 0; i < mo->size - 1; i++) { + ref<Expr> cur = os->read8(i); + cur = executor.toUnique(state, cur); + assert(cur.isConstant() && + "hit symbolic char while reading concrete string"); + buf[i] = cur.getConstantValue(); + } + buf[i] = 0; + + std::string result(buf); + delete[] buf; + return result; +} + +/****/ + +void SpecialFunctionHandler::handleAbort(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==0 && "invalid number of arguments to abort"); + + //XXX:DRE:TAINT + if(state.underConstrained) { + llvm::cerr << "TAINT: skipping abort fail\n"; + executor.terminateState(state); + } else { + executor.terminateStateOnError(state, "abort failure", "abort.err"); + } +} + +void SpecialFunctionHandler::handleExit(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && "invalid number of arguments to exit"); + executor.terminateStateOnExit(state); +} + +void SpecialFunctionHandler::handleSilentExit(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && "invalid number of arguments to exit"); + executor.terminateState(state); +} + +void SpecialFunctionHandler::handleAliasFunction(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==2 && + "invalid number of arguments to klee_alias_function"); + std::string old_fn = readStringAtAddress(state, arguments[0]); + std::string new_fn = readStringAtAddress(state, arguments[1]); + //llvm::cerr << "Replacing " << old_fn << "() with " << new_fn << "()\n"; + if (old_fn == new_fn) + state.removeFnAlias(old_fn); + else state.addFnAlias(old_fn, new_fn); +} + +void SpecialFunctionHandler::handleAssert(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==3 && "invalid number of arguments to _assert"); + + //XXX:DRE:TAINT + if(state.underConstrained) { + llvm::cerr << "TAINT: skipping assertion:" + << readStringAtAddress(state, arguments[0]) << "\n"; + executor.terminateState(state); + } else + executor.terminateStateOnError(state, + "ASSERTION FAIL: " + readStringAtAddress(state, arguments[0]), + "assert.err"); +} + +void SpecialFunctionHandler::handleAssertFail(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==4 && "invalid number of arguments to __assert_fail"); + + //XXX:DRE:TAINT + if(state.underConstrained) { + llvm::cerr << "TAINT: skipping assertion:" + << readStringAtAddress(state, arguments[0]) << "\n"; + executor.terminateState(state); + } else + executor.terminateStateOnError(state, + "ASSERTION FAIL: " + readStringAtAddress(state, arguments[0]), + "assert.err"); +} + +void SpecialFunctionHandler::handleReportError(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==4 && "invalid number of arguments to klee_report_error"); + + // arguments[0], arguments[1] are file, line + + //XXX:DRE:TAINT + if(state.underConstrained) { + llvm::cerr << "TAINT: skipping klee_report_error:" + << readStringAtAddress(state, arguments[2]) << ":" + << readStringAtAddress(state, arguments[3]) << "\n"; + executor.terminateState(state); + } else + executor.terminateStateOnError(state, + readStringAtAddress(state, arguments[2]), + readStringAtAddress(state, arguments[3])); +} + +void SpecialFunctionHandler::handleMerge(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // nop +} + +void SpecialFunctionHandler::handleNew(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && "invalid number of arguments to new"); + + executor.executeAlloc(state, arguments[0], false, target); +} + +void SpecialFunctionHandler::handleDelete(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && "invalid number of arguments to delete"); + executor.executeFree(state, arguments[0]); +} + +void SpecialFunctionHandler::handleNewArray(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && "invalid number of arguments to new[]"); + executor.executeAlloc(state, arguments[0], false, target); +} + +void SpecialFunctionHandler::handleDeleteArray(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && "invalid number of arguments to delete[]"); + executor.executeFree(state, arguments[0]); +} + +void SpecialFunctionHandler::handleMalloc(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && "invalid number of arguments to malloc"); + executor.executeAlloc(state, arguments[0], false, target); +} + +void SpecialFunctionHandler::handleMallocN(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + + // XXX should type check args + assert(arguments.size() == 3 && "invalid number of arguments to malloc"); + + // mallocn(number, size, alignment) + ref<Expr> numElems = executor.toUnique(state, arguments[0]); + ref<Expr> elemSize = executor.toUnique(state, arguments[1]); + ref<Expr> elemAlignment = executor.toUnique(state, arguments[2]); + + assert(numElems.isConstant() && + elemSize.isConstant() && + elemAlignment.isConstant() && + "symbolic arguments passed to klee_mallocn"); + + executor.executeAllocN(state, + numElems.getConstantValue(), + elemSize.getConstantValue(), + elemAlignment.getConstantValue(), + false, + target); +} + +void SpecialFunctionHandler::handleAssume(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && "invalid number of arguments to klee_assume"); + + ref<Expr> e = arguments[0]; + + if(e.getWidth() != Expr::Bool) + e = NeExpr::create(e, ConstantExpr::create(0, e.getWidth())); + + bool res; + bool success = executor.solver->mustBeFalse(state, e, res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + executor.terminateStateOnError(state, + "invalid klee_assume call (provably false)", + "user.err"); + } else { + executor.addConstraint(state, e); + } +} + +void SpecialFunctionHandler::handleIsSymbolic(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && "invalid number of arguments to klee_is_symbolic"); + + executor.bindLocal(target, state, + ConstantExpr::create(!arguments[0].isConstant(), Expr::Int32)); +} + +void SpecialFunctionHandler::handlePreferCex(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==2 && + "invalid number of arguments to klee_prefex_cex"); + + ref<Expr> cond = arguments[1]; + if (cond.getWidth() != Expr::Bool) + cond = NeExpr::create(cond, ref<Expr>(0, cond.getWidth())); + + Executor::ExactResolutionList rl; + executor.resolveExact(state, arguments[0], rl, "prefex_cex"); + + assert(rl.size() == 1 && + "prefer_cex target must resolve to precisely one object"); + + rl[0].first.first->cexPreferences.push_back(cond); +} + +void SpecialFunctionHandler::handlePrintExpr(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==2 && + "invalid number of arguments to klee_print_expr"); + + std::string msg_str = readStringAtAddress(state, arguments[0]); + llvm::cerr << msg_str << ":" << arguments[1] << "\n"; +} + + +void SpecialFunctionHandler::handleUnderConstrained(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && + "invalid number of arguments to klee_under_constrained()."); + assert(arguments[0].isConstant() && + "symbolic argument given to klee_under_constrained!"); + + unsigned v = arguments[0].getConstantValue(); + llvm::cerr << "argument = " << v << " under=" << state.underConstrained << "\n"; + if(v) { + assert(state.underConstrained == false && + "Bogus call to klee_under_constrained()."); + state.underConstrained = v; + llvm::cerr << "turning on under!\n"; + } else { + assert(state.underConstrained != 0 && "Bogus call to klee_taint_end()"); + state.underConstrained = 0; + llvm::cerr << "turning off under!\n"; + } +} + +void SpecialFunctionHandler::handleSetForking(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && + "invalid number of arguments to klee_set_forking"); + ref<Expr> value = executor.toUnique(state, arguments[0]); + + if (!value.isConstant()) { + executor.terminateStateOnError(state, + "klee_set_forking requires a constant arg", + "user.err"); + } else { + state.forkDisabled = !value.getConstantValue(); + } +} + +void SpecialFunctionHandler::handleWarning(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && "invalid number of arguments to klee_warning"); + + std::string msg_str = readStringAtAddress(state, arguments[0]); + klee_warning("%s: %s", state.stack.back().kf->function->getName().c_str(), + msg_str.c_str()); +} + +void SpecialFunctionHandler::handleWarningOnce(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && + "invalid number of arguments to klee_warning_once"); + + std::string msg_str = readStringAtAddress(state, arguments[0]); + klee_warning_once(0, "%s: %s", state.stack.back().kf->function->getName().c_str(), + msg_str.c_str()); +} + +void SpecialFunctionHandler::handlePrintRange(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==2 && + "invalid number of arguments to klee_print_range"); + + std::string msg_str = readStringAtAddress(state, arguments[0]); + llvm::cerr << msg_str << ":" << arguments[1]; + if (!arguments[1].isConstant()) { + // FIXME: Pull into a unique value method? + ref<Expr> value; + bool success = executor.solver->getValue(state, arguments[1], value); + assert(success && "FIXME: Unhandled solver failure"); + bool res; + success = executor.solver->mustBeTrue(state, + EqExpr::create(arguments[1], value), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + llvm::cerr << " == " << value; + } else { + llvm::cerr << " ~= " << value; + std::pair< ref<Expr>, ref<Expr> > res = + executor.solver->getRange(state, arguments[1]); + llvm::cerr << " (in [" << res.first << ", " << res.second <<"])"; + } + } + llvm::cerr << "\n"; +} + +void SpecialFunctionHandler::handleGetObjSize(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && + "invalid number of arguments to klee_get_obj_size"); + Executor::ExactResolutionList rl; + executor.resolveExact(state, arguments[0], rl, "klee_get_obj_size"); + for (Executor::ExactResolutionList::iterator it = rl.begin(), + ie = rl.end(); it != ie; ++it) { + executor.bindLocal(target, *it->second, + ConstantExpr::create(it->first.first->size, Expr::Int32)); + } +} + +void SpecialFunctionHandler::handleGetErrno(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==0 && + "invalid number of arguments to klee_get_obj_size"); + executor.bindLocal(target, state, + ConstantExpr::create(errno, Expr::Int32)); +} + +void SpecialFunctionHandler::handleCalloc(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==2 && + "invalid number of arguments to calloc"); + + ref<Expr> size = MulExpr::create(arguments[0], + arguments[1]); + executor.executeAlloc(state, size, false, target, true); +} + +void SpecialFunctionHandler::handleRealloc(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==2 && + "invalid number of arguments to realloc"); + ref<Expr> address = arguments[0]; + ref<Expr> size = arguments[1]; + + Executor::StatePair zeroSize = executor.fork(state, + Expr::createIsZero(size), + true); + + if (zeroSize.first) { // size == 0 + executor.executeFree(*zeroSize.first, address, target); + } + if (zeroSize.second) { // size != 0 + Executor::StatePair zeroPointer = executor.fork(*zeroSize.second, + Expr::createIsZero(address), + true); + + if (zeroPointer.first) { // address == 0 + executor.executeAlloc(*zeroPointer.first, size, false, target); + } + if (zeroPointer.second) { // address != 0 + Executor::ExactResolutionList rl; + executor.resolveExact(*zeroPointer.second, address, rl, "realloc"); + + for (Executor::ExactResolutionList::iterator it = rl.begin(), + ie = rl.end(); it != ie; ++it) { + executor.executeAlloc(*it->second, size, false, target, false, + it->first.second); + } + } + } +} + +void SpecialFunctionHandler::handleFree(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + // XXX should type check args + assert(arguments.size()==1 && + "invalid number of arguments to free"); + executor.executeFree(state, arguments[0]); +} + +void SpecialFunctionHandler::handleCheckMemoryAccess(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==2 && + "invalid number of arguments to klee_check_memory_access"); + + ref<Expr> address = executor.toUnique(state, arguments[0]); + ref<Expr> size = executor.toUnique(state, arguments[1]); + if (!address.isConstant() || !size.isConstant()) { + executor.terminateStateOnError(state, + "check_memory_access requires constant args", + "user.err"); + } else { + ObjectPair op; + + if (!state.addressSpace.resolveOne(address.getConstantValue(), op)) { + executor.terminateStateOnError(state, + "check_memory_access: memory error", + "ptr.err", + executor.getAddressInfo(state, address)); + } else { + ref<Expr> chk = op.first->getBoundsCheckPointer(address, + size.getConstantValue()); + assert(chk.isConstant()); + if (!chk.getConstantValue()) { + executor.terminateStateOnError(state, + "check_memory_access: memory error", + "ptr.err", + executor.getAddressInfo(state, address)); + } + } + } +} + +void SpecialFunctionHandler::handleGetValue(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && + "invalid number of arguments to klee_get_value"); + + executor.executeGetValue(state, arguments[0], target); +} + +void SpecialFunctionHandler::handleDefineFixedObject(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==2 && + "invalid number of arguments to klee_define_fixed_object"); + assert(arguments[0].isConstant() && + "expect constant address argument to klee_define_fixed_object"); + assert(arguments[1].isConstant() && + "expect constant size argument to klee_define_fixed_object"); + + uint64_t address = arguments[0].getConstantValue(); + uint64_t size = arguments[1].getConstantValue(); + MemoryObject *mo = executor.memory->allocateFixed(address, size, state.prevPC->inst); + executor.bindObjectInState(state, mo, false); + mo->isUserSpecified = true; // XXX hack; +} + +void SpecialFunctionHandler::handleMakeSymbolic(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==3 && + "invalid number of arguments to klee_make_symbolic[_name]"); + + Executor::ExactResolutionList rl; + executor.resolveExact(state, arguments[0], rl, "make_symbolic"); + + for (Executor::ExactResolutionList::iterator it = rl.begin(), + ie = rl.end(); it != ie; ++it) { + MemoryObject *mo = (MemoryObject*) it->first.first; + std::string name = readStringAtAddress(state, arguments[2]); + mo->setName(name); + + const ObjectState *old = it->first.second; + ExecutionState *s = it->second; + + if (old->readOnly) { + executor.terminateStateOnError(*s, + "cannot make readonly object symbolic", + "user.err"); + return; + } + + bool res; + bool success = + executor.solver->mustBeTrue(*s, EqExpr::create(arguments[1], + mo->getSizeExpr()), + res); + assert(success && "FIXME: Unhandled solver failure"); + + if (res) { + executor.executeMakeSymbolic(*s, mo); + } else { + executor.terminateStateOnError(*s, + "wrong size given to klee_make_symbolic[_name]", + "user.err"); + } + } +} + +void SpecialFunctionHandler::handleMarkGlobal(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > &arguments) { + assert(arguments.size()==1 && + "invalid number of arguments to klee_mark_global"); + + Executor::ExactResolutionList rl; + executor.resolveExact(state, arguments[0], rl, "mark_global"); + + for (Executor::ExactResolutionList::iterator it = rl.begin(), + ie = rl.end(); it != ie; ++it) { + MemoryObject *mo = (MemoryObject*) it->first.first; + assert(!mo->isLocal); + mo->isGlobal = true; + } +} diff --git a/lib/Core/SpecialFunctionHandler.h b/lib/Core/SpecialFunctionHandler.h new file mode 100644 index 00000000..d5d1af93 --- /dev/null +++ b/lib/Core/SpecialFunctionHandler.h @@ -0,0 +1,106 @@ +//===-- SpecialFunctionHandler.h --------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_SPECIALFUNCTIONHANDLER_H +#define KLEE_SPECIALFUNCTIONHANDLER_H + +#include <map> +#include <vector> +#include <string> + +namespace llvm { + class Function; +} + +namespace klee { + class Executor; + class Expr; + class ExecutionState; + class KInstruction; + template<typename T> class ref; + + class SpecialFunctionHandler { + public: + typedef void (SpecialFunctionHandler::*Handler)(ExecutionState &state, + KInstruction *target, + std::vector<ref<Expr> > + &arguments); + typedef std::map<const llvm::Function*, + std::pair<Handler,bool> > handlers_ty; + + handlers_ty handlers; + class Executor &executor; + + public: + SpecialFunctionHandler(Executor &_executor); + + /// Perform any modifications on the LLVM module before it is + /// prepared for execution. At the moment this involves deleting + /// unused function bodies and marking intrinsics with appropriate + /// flags for use in optimizations. + void prepare(); + + /// Initialize the internal handler map after the module has been + /// prepared for execution. + void bind(); + + bool handle(ExecutionState &state, + llvm::Function *f, + KInstruction *target, + std::vector< ref<Expr> > &arguments); + + /* Convenience routines */ + + std::string readStringAtAddress(ExecutionState &state, ref<Expr> address); + + /* Handlers */ + +#define HANDLER(name) void name(ExecutionState &state, \ + KInstruction *target, \ + std::vector< ref<Expr> > &arguments) + HANDLER(handleAbort); + HANDLER(handleAssert); + HANDLER(handleAssertFail); + HANDLER(handleAssume); + HANDLER(handleCalloc); + HANDLER(handleCheckMemoryAccess); + HANDLER(handleDefineFixedObject); + HANDLER(handleDelete); + HANDLER(handleDeleteArray); + HANDLER(handleExit); + HANDLER(handleAliasFunction); + HANDLER(handleFree); + HANDLER(handleGetErrno); + HANDLER(handleGetObjSize); + HANDLER(handleGetValue); + HANDLER(handleIsSymbolic); + HANDLER(handleMakeSymbolic); + HANDLER(handleMalloc); + HANDLER(handleMallocN); + HANDLER(handleMarkGlobal); + HANDLER(handleMerge); + HANDLER(handleNew); + HANDLER(handleNewArray); + HANDLER(handlePreferCex); + HANDLER(handlePrintExpr); + HANDLER(handlePrintRange); + HANDLER(handleRange); + HANDLER(handleRealloc); + HANDLER(handleReportError); + HANDLER(handleRevirtObjects); + HANDLER(handleSetForking); + HANDLER(handleSilentExit); + HANDLER(handleUnderConstrained); + HANDLER(handleWarning); + HANDLER(handleWarningOnce); +#undef HANDLER + }; +} // End klee namespace + +#endif diff --git a/lib/Core/StatsTracker.cpp b/lib/Core/StatsTracker.cpp new file mode 100644 index 00000000..35c073a3 --- /dev/null +++ b/lib/Core/StatsTracker.cpp @@ -0,0 +1,814 @@ +//===-- StatsTracker.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "StatsTracker.h" + +#include "klee/ExecutionState.h" +#include "klee/Statistics.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Module/KModule.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Support/ModuleUtil.h" +#include "klee/Internal/System/Time.h" + +#include "CallPathManager.h" +#include "CoreStats.h" +#include "Executor.h" +#include "MemoryManager.h" +#include "UserSearcher.h" +#include "../Solver/SolverStats.h" + +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/InlineAsm.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/System/Process.h" +#include "llvm/System/Path.h" + +#include <iostream> +#include <fstream> + +using namespace klee; +using namespace llvm; + +/// + +namespace { + cl::opt<bool> + TrackInstructionTime("track-instruction-time", + cl::desc("Enable tracking of time for individual instructions"), + cl::init(false)); + + cl::opt<bool> + OutputStats("output-stats", + cl::desc("Write running stats trace file"), + cl::init(true)); + + cl::opt<bool> + OutputIStats("output-istats", + cl::desc("Write instruction level statistics (in callgrind format)"), + cl::init(true)); + + cl::opt<double> + StatsWriteInterval("stats-write-interval", + cl::desc("Approximate number of seconds between stats writes (default: 1.0)"), + cl::init(1.)); + + cl::opt<double> + IStatsWriteInterval("istats-write-interval", + cl::desc("Approximate number of seconds between istats writes (default: 10.0)"), + cl::init(10.)); + + /* + cl::opt<double> + BranchCovCountsWriteInterval("branch-cov-counts-write-interval", + cl::desc("Approximate number of seconds between run.branches writes (default: 5.0)"), + cl::init(5.)); + */ + + // XXX I really would like to have dynamic rate control for something like this. + cl::opt<double> + UncoveredUpdateInterval("uncovered-update-interval", + cl::init(30.)); + + cl::opt<bool> + UseCallPaths("use-call-paths", + cl::desc("Enable calltree tracking for instruction level statistics"), + cl::init(true)); + +} + +/// + +bool StatsTracker::useStatistics() { + return OutputStats || OutputIStats; +} + +namespace klee { + class WriteIStatsTimer : public Executor::Timer { + StatsTracker *statsTracker; + + public: + WriteIStatsTimer(StatsTracker *_statsTracker) : statsTracker(_statsTracker) {} + ~WriteIStatsTimer() {} + + void run() { statsTracker->writeIStats(); } + }; + + class WriteStatsTimer : public Executor::Timer { + StatsTracker *statsTracker; + + public: + WriteStatsTimer(StatsTracker *_statsTracker) : statsTracker(_statsTracker) {} + ~WriteStatsTimer() {} + + void run() { statsTracker->writeStatsLine(); } + }; + + class UpdateReachableTimer : public Executor::Timer { + StatsTracker *statsTracker; + + public: + UpdateReachableTimer(StatsTracker *_statsTracker) : statsTracker(_statsTracker) {} + + void run() { statsTracker->computeReachableUncovered(); } + }; + +} + +// + +/// Check for special cases where we statically know an instruction is +/// uncoverable. Currently the case is an unreachable instruction +/// following a noreturn call; the instruction is really only there to +/// satisfy LLVM's termination requirement. +static bool instructionIsCoverable(Instruction *i) { + if (i->getOpcode() == Instruction::Unreachable) { + BasicBlock *bb = i->getParent(); + BasicBlock::iterator it(i); + if (it==bb->begin()) { + return true; + } else { + Instruction *prev = --it; + if (isa<CallInst>(prev) || isa<InvokeInst>(prev)) { + Function *target = getDirectCallTarget(prev); + if (target && target->doesNotReturn()) + return false; + } + } + } + + return true; +} + +StatsTracker::StatsTracker(Executor &_executor, std::string _objectFilename, + bool _updateMinDistToUncovered) + : executor(_executor), + objectFilename(_objectFilename), + statsFile(0), + istatsFile(0), + startWallTime(util::getWallTime()), + numBranches(0), + fullBranches(0), + partialBranches(0), + updateMinDistToUncovered(_updateMinDistToUncovered) { + KModule *km = executor.kmodule; + + sys::Path module(objectFilename); + if (!sys::Path(objectFilename).isAbsolute()) { + sys::Path current = sys::Path::GetCurrentDirectory(); + current.appendComponent(objectFilename); + if (current.exists()) + objectFilename = current.c_str(); + } + + if (OutputIStats) + theStatisticManager->useIndexedStats(km->infos->getMaxID()); + + for (std::vector<KFunction*>::iterator it = km->functions.begin(), + ie = km->functions.end(); it != ie; ++it) { + KFunction *kf = *it; + kf->trackCoverage = 1; + + for (unsigned i=0; i<kf->numInstructions; ++i) { + KInstruction *ki = kf->instructions[i]; + + if (OutputIStats) { + unsigned id = ki->info->id; + theStatisticManager->setIndex(id); + if (kf->trackCoverage && instructionIsCoverable(ki->inst)) + ++stats::uncoveredInstructions; + } + + if (kf->trackCoverage) { + if (BranchInst *bi = dyn_cast<BranchInst>(ki->inst)) + if (!bi->isUnconditional()) + numBranches++; + } + } + } + + if (OutputStats) { + statsFile = executor.interpreterHandler->openOutputFile("run.stats"); + assert(statsFile && "unable to open statistics trace file"); + writeStatsHeader(); + writeStatsLine(); + + executor.addTimer(new WriteStatsTimer(this), StatsWriteInterval); + + if (updateMinDistToUncovered) + executor.addTimer(new UpdateReachableTimer(this), UncoveredUpdateInterval); + } + + if (OutputIStats) { + istatsFile = executor.interpreterHandler->openOutputFile("run.istats"); + assert(istatsFile && "unable to open istats file"); + + executor.addTimer(new WriteIStatsTimer(this), IStatsWriteInterval); + } +} + +StatsTracker::~StatsTracker() { + if (statsFile) + delete statsFile; + if (istatsFile) + delete istatsFile; +} + +void StatsTracker::done() { + if (statsFile) + writeStatsLine(); + if (OutputIStats) + writeIStats(); +} + +void StatsTracker::stepInstruction(ExecutionState &es) { + if (OutputIStats) { + if (TrackInstructionTime) { + static sys::TimeValue lastNowTime(0,0),lastUserTime(0,0); + + if (lastUserTime.seconds()==0 && lastUserTime.nanoseconds()==0) { + sys::TimeValue sys(0,0); + sys::Process::GetTimeUsage(lastNowTime,lastUserTime,sys); + } else { + sys::TimeValue now(0,0),user(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + sys::TimeValue delta = user - lastUserTime; + sys::TimeValue deltaNow = now - lastNowTime; + stats::instructionTime += delta.usec(); + stats::instructionRealTime += deltaNow.usec(); + lastUserTime = user; + lastNowTime = now; + } + } + + Instruction *inst = es.pc->inst; + const InstructionInfo &ii = *es.pc->info; + StackFrame &sf = es.stack.back(); + theStatisticManager->setIndex(ii.id); + if (UseCallPaths) + theStatisticManager->setContext(&sf.callPathNode->statistics); + + if (es.instsSinceCovNew) + ++es.instsSinceCovNew; + + if (sf.kf->trackCoverage && instructionIsCoverable(inst)) { + if (!theStatisticManager->getIndexedValue(stats::coveredInstructions, ii.id)) { + // Checking for actual stoppoints avoids inconsistencies due + // to line number propogation. + if (isa<DbgStopPointInst>(inst)) + es.coveredLines[&ii.file].insert(ii.line); + es.coveredNew = true; + es.instsSinceCovNew = 1; + ++stats::coveredInstructions; + stats::uncoveredInstructions += (uint64_t)-1; + } + } + } +} + +/// + +/* Should be called _after_ the es->pushFrame() */ +void StatsTracker::framePushed(ExecutionState &es, StackFrame *parentFrame) { + if (OutputIStats) { + StackFrame &sf = es.stack.back(); + + if (UseCallPaths) { + CallPathNode *parent = parentFrame ? parentFrame->callPathNode : 0; + CallPathNode *cp = callPathManager.getCallPath(parent, + sf.caller ? sf.caller->inst : 0, + sf.kf->function); + sf.callPathNode = cp; + cp->count++; + } + + if (updateMinDistToUncovered) { + uint64_t minDistAtRA = 0; + if (parentFrame) + minDistAtRA = parentFrame->minDistToUncoveredOnReturn; + + sf.minDistToUncoveredOnReturn = sf.caller ? + computeMinDistToUncovered(sf.caller, minDistAtRA) : 0; + } + } +} + +/* Should be called _after_ the es->popFrame() */ +void StatsTracker::framePopped(ExecutionState &es) { + // XXX remove me? +} + + +void StatsTracker::markBranchVisited(ExecutionState *visitedTrue, + ExecutionState *visitedFalse) { + if (OutputIStats) { + unsigned id = theStatisticManager->getIndex(); + uint64_t hasTrue = theStatisticManager->getIndexedValue(stats::trueBranches, id); + uint64_t hasFalse = theStatisticManager->getIndexedValue(stats::falseBranches, id); + if (visitedTrue && !hasTrue) { + visitedTrue->coveredNew = true; + visitedTrue->instsSinceCovNew = 1; + ++stats::trueBranches; + if (hasFalse) { ++fullBranches; --partialBranches; } + else ++partialBranches; + hasTrue = 1; + } + if (visitedFalse && !hasFalse) { + visitedFalse->coveredNew = true; + visitedFalse->instsSinceCovNew = 1; + ++stats::falseBranches; + if (hasTrue) { ++fullBranches; --partialBranches; } + else ++partialBranches; + } + } +} + +void StatsTracker::writeStatsHeader() { + *statsFile << "('Instructions'," + << "'FullBranches'," + << "'PartialBranches'," + << "'NumBranches'," + << "'UserTime'," + << "'NumStates'," + << "'MallocUsage'," + << "'NumQueries'," + << "'NumQueryConstructs'," + << "'NumObjects'," + << "'WallTime'," + << "'CoveredInstructions'," + << "'UncoveredInstructions'," + << "'QueryTime'," + << "'SolverTime'," + << "'CexCacheTime'," + << "'ForkTime'," + << "'ResolveTime'," + << ")\n"; + statsFile->flush(); +} + +double StatsTracker::elapsed() { + return util::getWallTime() - startWallTime; +} + +void StatsTracker::writeStatsLine() { + *statsFile << "(" << stats::instructions + << "," << fullBranches + << "," << partialBranches + << "," << numBranches + << "," << util::getUserTime() + << "," << executor.states.size() + << "," << sys::Process::GetTotalMemoryUsage() + << "," << stats::queries + << "," << stats::queryConstructs + << "," << 0 // was numObjects + << "," << elapsed() + << "," << stats::coveredInstructions + << "," << stats::uncoveredInstructions + << "," << stats::queryTime / 1000000. + << "," << stats::solverTime / 1000000. + << "," << stats::cexCacheTime / 1000000. + << "," << stats::forkTime / 1000000. + << "," << stats::resolveTime / 1000000. + << ")\n"; + statsFile->flush(); +} + +void StatsTracker::updateStateStatistics(uint64_t addend) { + for (std::set<ExecutionState*>::iterator it = executor.states.begin(), + ie = executor.states.end(); it != ie; ++it) { + ExecutionState &state = **it; + const InstructionInfo &ii = *state.pc->info; + theStatisticManager->incrementIndexedValue(stats::states, ii.id, addend); + if (UseCallPaths) + state.stack.back().callPathNode->statistics.incrementValue(stats::states, addend); + } +} + +void StatsTracker::writeIStats() { + Module *m = executor.kmodule->module; + uint64_t istatsMask = 0; + std::ostream &of = *istatsFile; + + of.seekp(0, std::ios::end); + unsigned istatsSize = of.tellp(); + of.seekp(0); + + of << "version: 1\n"; + of << "creator: klee\n"; + of << "pid: " << sys::Process::GetCurrentUserId() << "\n"; + of << "cmd: " << m->getModuleIdentifier() << "\n\n"; + of << "\n"; + + StatisticManager &sm = *theStatisticManager; + unsigned nStats = sm.getNumStatistics(); + + // Max is 13, sadly + istatsMask |= 1<<sm.getStatisticID("Queries"); + istatsMask |= 1<<sm.getStatisticID("QueriesValid"); + istatsMask |= 1<<sm.getStatisticID("QueriesInvalid"); + istatsMask |= 1<<sm.getStatisticID("QueryTime"); + istatsMask |= 1<<sm.getStatisticID("ResolveTime"); + istatsMask |= 1<<sm.getStatisticID("Instructions"); + istatsMask |= 1<<sm.getStatisticID("InstructionTimes"); + istatsMask |= 1<<sm.getStatisticID("InstructionRealTimes"); + istatsMask |= 1<<sm.getStatisticID("Forks"); + istatsMask |= 1<<sm.getStatisticID("CoveredInstructions"); + istatsMask |= 1<<sm.getStatisticID("UncoveredInstructions"); + istatsMask |= 1<<sm.getStatisticID("States"); + istatsMask |= 1<<sm.getStatisticID("MinDistToUncovered"); + + of << "positions: instr line\n"; + + for (unsigned i=0; i<nStats; i++) { + if (istatsMask & (1<<i)) { + Statistic &s = sm.getStatistic(i); + of << "event: " << s.getShortName() << " : " + << s.getName() << "\n"; + } + } + + of << "events: "; + for (unsigned i=0; i<nStats; i++) { + if (istatsMask & (1<<i)) + of << sm.getStatistic(i).getShortName() << " "; + } + of << "\n"; + + // set state counts, decremented after we process so that we don't + // have to zero all records each time. + if (istatsMask & (1<<stats::states.getID())) + updateStateStatistics(1); + + std::string sourceFile = ""; + + CallSiteSummaryTable callSiteStats; + if (UseCallPaths) + callPathManager.getSummaryStatistics(callSiteStats); + + of << "ob=" << objectFilename << "\n"; + + for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); + fnIt != fn_ie; ++fnIt) { + if (!fnIt->isDeclaration()) { + of << "fn=" << fnIt->getName() << "\n"; + for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); + bbIt != bb_ie; ++bbIt) { + for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); + it != it; ++it) { + Instruction *instr = &*it; + const InstructionInfo &ii = executor.kmodule->infos->getInfo(instr); + unsigned index = ii.id; + if (ii.file!=sourceFile) { + of << "fl=" << ii.file << "\n"; + sourceFile = ii.file; + } + of << ii.assemblyLine << " "; + of << ii.line << " "; + for (unsigned i=0; i<nStats; i++) + if (istatsMask&(1<<i)) + of << sm.getIndexedValue(sm.getStatistic(i), index) << " "; + of << "\n"; + + if (UseCallPaths && + (isa<CallInst>(instr) || isa<InvokeInst>(instr))) { + CallSiteSummaryTable::iterator it = callSiteStats.find(instr); + if (it!=callSiteStats.end()) { + for (std::map<llvm::Function*, CallSiteInfo>::iterator + fit = it->second.begin(), fie = it->second.end(); + fit != fie; ++fit) { + Function *f = fit->first; + CallSiteInfo &csi = fit->second; + const InstructionInfo &fii = + executor.kmodule->infos->getFunctionInfo(f); + + if (fii.file!="" && fii.file!=sourceFile) + of << "cfl=" << fii.file << "\n"; + of << "cfn=" << f->getName() << "\n"; + of << "calls=" << csi.count << " "; + of << fii.assemblyLine << " "; + of << fii.line << "\n"; + + of << ii.assemblyLine << " "; + of << ii.line << " "; + for (unsigned i=0; i<nStats; i++) { + if (istatsMask&(1<<i)) { + Statistic &s = sm.getStatistic(i); + uint64_t value; + + // Hack, ignore things that don't make sense on + // call paths. + if (&s == &stats::uncoveredInstructions) { + value = 0; + } else { + value = csi.statistics.getValue(s); + } + + of << value << " "; + } + } + of << "\n"; + } + } + } + } + } + } + } + + if (istatsMask & (1<<stats::states.getID())) + updateStateStatistics((uint64_t)-1); + + // Clear then end of the file if necessary (no truncate op?). + unsigned pos = of.tellp(); + for (unsigned i=pos; i<istatsSize; ++i) + of << '\n'; + + of.flush(); +} + +/// + +typedef std::map<Instruction*, std::vector<Function*> > calltargets_ty; + +static calltargets_ty callTargets; +static std::map<Function*, std::vector<Instruction*> > functionCallers; +static std::map<Function*, unsigned> functionShortestPath; + +static std::vector<Instruction*> getSuccs(Instruction *i) { + BasicBlock *bb = i->getParent(); + std::vector<Instruction*> res; + + if (i==bb->getTerminator()) { + for (succ_iterator it = succ_begin(bb), ie = succ_end(bb); it != ie; ++it) + res.push_back(it->begin()); + } else { + res.push_back(++BasicBlock::iterator(i)); + } + + return res; +} + +uint64_t klee::computeMinDistToUncovered(const KInstruction *ki, + uint64_t minDistAtRA) { + StatisticManager &sm = *theStatisticManager; + if (minDistAtRA==0) { // unreachable on return, best is local + return sm.getIndexedValue(stats::minDistToUncovered, + ki->info->id); + } else { + uint64_t minDistLocal = sm.getIndexedValue(stats::minDistToUncovered, + ki->info->id); + uint64_t distToReturn = sm.getIndexedValue(stats::minDistToReturn, + ki->info->id); + + if (distToReturn==0) { // return unreachable, best is local + return minDistLocal; + } else if (!minDistLocal) { // no local reachable + return distToReturn + minDistAtRA; + } else { + return std::min(minDistLocal, distToReturn + minDistAtRA); + } + } +} + +void StatsTracker::computeReachableUncovered() { + KModule *km = executor.kmodule; + Module *m = km->module; + static bool init = true; + const InstructionInfoTable &infos = *km->infos; + StatisticManager &sm = *theStatisticManager; + + if (init) { + init = false; + + // Compute call targets. It would be nice to use alias information + // instead of assuming all indirect calls hit all escaping + // functions, eh? + for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); + fnIt != fn_ie; ++fnIt) { + for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); + bbIt != bb_ie; ++bbIt) { + for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); + it != it; ++it) { + if (isa<CallInst>(it) || isa<InvokeInst>(it)) { + if (isa<InlineAsm>(it->getOperand(0))) { + // We can never call through here so assume no targets + // (which should be correct anyhow). + callTargets.insert(std::make_pair(it, + std::vector<Function*>())); + } else if (Function *target = getDirectCallTarget(it)) { + callTargets[it].push_back(target); + } else { + callTargets[it] = + std::vector<Function*>(km->escapingFunctions.begin(), + km->escapingFunctions.end()); + } + } + } + } + } + + // Compute function callers as reflexion of callTargets. + for (calltargets_ty::iterator it = callTargets.begin(), + ie = callTargets.end(); it != ie; ++it) + for (std::vector<Function*>::iterator fit = it->second.begin(), + fie = it->second.end(); fit != fie; ++fit) + functionCallers[*fit].push_back(it->first); + + // Initialize minDistToReturn to shortest paths through + // functions. 0 is unreachable. + std::vector<Instruction *> instructions; + for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); + fnIt != fn_ie; ++fnIt) { + if (fnIt->isDeclaration()) { + if (fnIt->doesNotReturn()) { + functionShortestPath[fnIt] = 0; + } else { + functionShortestPath[fnIt] = 1; // whatever + } + } else { + functionShortestPath[fnIt] = 0; + } + + // Not sure if I should bother to preorder here. XXX I should. + for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); + bbIt != bb_ie; ++bbIt) { + for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); + it != it; ++it) { + instructions.push_back(it); + unsigned id = infos.getInfo(it).id; + sm.setIndexedValue(stats::minDistToReturn, + id, + isa<ReturnInst>(it) || isa<UnwindInst>(it)); + } + } + } + + std::reverse(instructions.begin(), instructions.end()); + + // I'm so lazy it's not even worklisted. + bool changed; + do { + changed = false; + for (std::vector<Instruction*>::iterator it = instructions.begin(), + ie = instructions.end(); it != ie; ++it) { + Instruction *inst = *it; + unsigned bestThrough = 0; + + if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { + std::vector<Function*> &targets = callTargets[inst]; + for (std::vector<Function*>::iterator fnIt = targets.begin(), + ie = targets.end(); fnIt != ie; ++fnIt) { + uint64_t dist = functionShortestPath[*fnIt]; + if (dist) { + dist = 1+dist; // count instruction itself + if (bestThrough==0 || dist<bestThrough) + bestThrough = dist; + } + } + } else { + bestThrough = 1; + } + + if (bestThrough) { + unsigned id = infos.getInfo(*it).id; + uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToReturn, id); + std::vector<Instruction*> succs = getSuccs(*it); + for (std::vector<Instruction*>::iterator it2 = succs.begin(), + ie = succs.end(); it2 != ie; ++it2) { + uint64_t dist = sm.getIndexedValue(stats::minDistToReturn, + infos.getInfo(*it2).id); + if (dist) { + uint64_t val = bestThrough + dist; + if (best==0 || val<best) + best = val; + } + } + if (best != cur) { + sm.setIndexedValue(stats::minDistToReturn, id, best); + changed = true; + + // Update shortest path if this is the entry point. + Function *f = inst->getParent()->getParent(); + if (inst==f->begin()->begin()) + functionShortestPath[f] = best; + } + } + } + } while (changed); + } + + // compute minDistToUncovered, 0 is unreachable + std::vector<Instruction *> instructions; + for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); + fnIt != fn_ie; ++fnIt) { + // Not sure if I should bother to preorder here. + for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); + bbIt != bb_ie; ++bbIt) { + for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); + it != it; ++it) { + unsigned id = infos.getInfo(it).id; + instructions.push_back(&*it); + sm.setIndexedValue(stats::minDistToUncovered, + id, + sm.getIndexedValue(stats::uncoveredInstructions, id)); + } + } + } + + std::reverse(instructions.begin(), instructions.end()); + + // I'm so lazy it's not even worklisted. + bool changed; + do { + changed = false; + for (std::vector<Instruction*>::iterator it = instructions.begin(), + ie = instructions.end(); it != ie; ++it) { + Instruction *inst = *it; + uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToUncovered, + infos.getInfo(inst).id); + unsigned bestThrough = 0; + + if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { + std::vector<Function*> &targets = callTargets[inst]; + for (std::vector<Function*>::iterator fnIt = targets.begin(), + ie = targets.end(); fnIt != ie; ++fnIt) { + uint64_t dist = functionShortestPath[*fnIt]; + if (dist) { + dist = 1+dist; // count instruction itself + if (bestThrough==0 || dist<bestThrough) + bestThrough = dist; + } + + if (!(*fnIt)->isDeclaration()) { + uint64_t calleeDist = sm.getIndexedValue(stats::minDistToUncovered, + infos.getFunctionInfo(*fnIt).id); + if (calleeDist) { + calleeDist = 1+calleeDist; // count instruction itself + if (best==0 || calleeDist<best) + best = calleeDist; + } + } + } + } else { + bestThrough = 1; + } + + if (bestThrough) { + std::vector<Instruction*> succs = getSuccs(inst); + for (std::vector<Instruction*>::iterator it2 = succs.begin(), + ie = succs.end(); it2 != ie; ++it2) { + uint64_t dist = sm.getIndexedValue(stats::minDistToUncovered, + infos.getInfo(*it2).id); + if (dist) { + uint64_t val = bestThrough + dist; + if (best==0 || val<best) + best = val; + } + } + } + + if (best != cur) { + sm.setIndexedValue(stats::minDistToUncovered, + infos.getInfo(inst).id, + best); + changed = true; + } + } + } while (changed); + + for (std::set<ExecutionState*>::iterator it = executor.states.begin(), + ie = executor.states.end(); it != ie; ++it) { + ExecutionState *es = *it; + uint64_t currentFrameMinDist = 0; + for (ExecutionState::stack_ty::iterator sfIt = es->stack.begin(), + sf_ie = es->stack.end(); sfIt != sf_ie; ++sfIt) { + ExecutionState::stack_ty::iterator next = sfIt + 1; + KInstIterator kii; + + if (next==es->stack.end()) { + kii = es->pc; + } else { + kii = next->caller; + ++kii; + } + + sfIt->minDistToUncoveredOnReturn = currentFrameMinDist; + + currentFrameMinDist = computeMinDistToUncovered(kii, currentFrameMinDist); + } + } +} diff --git a/lib/Core/StatsTracker.h b/lib/Core/StatsTracker.h new file mode 100644 index 00000000..9d22b389 --- /dev/null +++ b/lib/Core/StatsTracker.h @@ -0,0 +1,93 @@ +//===-- StatsTracker.h ------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_STATSTRACKER_H +#define KLEE_STATSTRACKER_H + +#include "CallPathManager.h" + +#include <iostream> +#include <set> + +namespace llvm { + class BranchInst; + class Function; + class Instruction; +} + +namespace klee { + class ExecutionState; + class Executor; + class InstructionInfoTable; + class InterpreterHandler; + class KInstruction; + class StackFrame; + + class StatsTracker { + friend class WriteStatsTimer; + friend class WriteIStatsTimer; + + Executor &executor; + std::string objectFilename; + + std::ostream *statsFile, *istatsFile; + double startWallTime; + + unsigned numBranches; + unsigned fullBranches, partialBranches; + + CallPathManager callPathManager; + + bool updateMinDistToUncovered; + + public: + static bool useStatistics(); + + private: + void updateStateStatistics(uint64_t addend); + void writeStatsHeader(); + void writeStatsLine(); + void writeIStats(); + + public: + StatsTracker(Executor &_executor, std::string _objectFilename, + bool _updateMinDistToUncovered); + ~StatsTracker(); + + // called after a new StackFrame has been pushed (for callpath tracing) + void framePushed(ExecutionState &es, StackFrame *parentFrame); + + // called after a StackFrame has been popped + void framePopped(ExecutionState &es); + + // called when some side of a branch has been visited. it is + // imperative that this be called when the statistics index is at + // the index for the branch itself. + void markBranchVisited(ExecutionState *visitedTrue, + ExecutionState *visitedFalse); + + // called when execution is done and stats files should be flushed + void done(); + + // process stats for a single instruction step, es is the state + // about to be stepped + void stepInstruction(ExecutionState &es); + + /// Return time in seconds since execution start. + double elapsed(); + + void computeReachableUncovered(); + }; + + uint64_t computeMinDistToUncovered(const KInstruction *ki, + uint64_t minDistAtRA); + +} + +#endif diff --git a/lib/Core/TimingSolver.cpp b/lib/Core/TimingSolver.cpp new file mode 100644 index 00000000..70e42836 --- /dev/null +++ b/lib/Core/TimingSolver.cpp @@ -0,0 +1,147 @@ +//===-- TimingSolver.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "TimingSolver.h" + +#include "klee/ExecutionState.h" +#include "klee/Solver.h" +#include "klee/Statistics.h" + +#include "CoreStats.h" + +#include "llvm/System/Process.h" + +using namespace klee; +using namespace llvm; + +/***/ + +bool TimingSolver::evaluate(const ExecutionState& state, ref<Expr> expr, + Solver::Validity &result) { + // Fast path, to avoid timer and OS overhead. + if (expr.isConstant()) { + result = expr.getConstantValue() ? Solver::True : Solver::False; + return true; + } + + sys::TimeValue now(0,0),user(0,0),delta(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + + if (simplifyExprs) + expr = state.constraints.simplifyExpr(expr); + + bool success = solver->evaluate(Query(state.constraints, expr), result); + + sys::Process::GetTimeUsage(delta,user,sys); + delta -= now; + stats::solverTime += delta.usec(); + state.queryCost += delta.usec()/1000000.; + + return success; +} + +bool TimingSolver::mustBeTrue(const ExecutionState& state, ref<Expr> expr, + bool &result) { + // Fast path, to avoid timer and OS overhead. + if (expr.isConstant()) { + result = expr.getConstantValue() ? true : false; + return true; + } + + sys::TimeValue now(0,0),user(0,0),delta(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + + if (simplifyExprs) + expr = state.constraints.simplifyExpr(expr); + + bool success = solver->mustBeTrue(Query(state.constraints, expr), result); + + sys::Process::GetTimeUsage(delta,user,sys); + delta -= now; + stats::solverTime += delta.usec(); + state.queryCost += delta.usec()/1000000.; + + return success; +} + +bool TimingSolver::mustBeFalse(const ExecutionState& state, ref<Expr> expr, + bool &result) { + return mustBeTrue(state, Expr::createNot(expr), result); +} + +bool TimingSolver::mayBeTrue(const ExecutionState& state, ref<Expr> expr, + bool &result) { + bool res; + if (!mustBeFalse(state, expr, res)) + return false; + result = !res; + return true; +} + +bool TimingSolver::mayBeFalse(const ExecutionState& state, ref<Expr> expr, + bool &result) { + bool res; + if (!mustBeTrue(state, expr, res)) + return false; + result = !res; + return true; +} + +bool TimingSolver::getValue(const ExecutionState& state, ref<Expr> expr, + ref<Expr> &result) { + // Fast path, to avoid timer and OS overhead. + if (expr.isConstant()) { + result = expr; + return true; + } + + sys::TimeValue now(0,0),user(0,0),delta(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + + if (simplifyExprs) + expr = state.constraints.simplifyExpr(expr); + + bool success = solver->getValue(Query(state.constraints, expr), result); + + sys::Process::GetTimeUsage(delta,user,sys); + delta -= now; + stats::solverTime += delta.usec(); + state.queryCost += delta.usec()/1000000.; + + return success; +} + +bool +TimingSolver::getInitialValues(const ExecutionState& state, + const std::vector<const Array*> + &objects, + std::vector< std::vector<unsigned char> > + &result) { + if (objects.empty()) + return true; + + sys::TimeValue now(0,0),user(0,0),delta(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + + bool success = solver->getInitialValues(Query(state.constraints, + ref<Expr>(0, Expr::Bool)), + objects, result); + + sys::Process::GetTimeUsage(delta,user,sys); + delta -= now; + stats::solverTime += delta.usec(); + state.queryCost += delta.usec()/1000000.; + + return success; +} + +std::pair< ref<Expr>, ref<Expr> > +TimingSolver::getRange(const ExecutionState& state, ref<Expr> expr) { + return solver->getRange(Query(state.constraints, expr)); +} diff --git a/lib/Core/TimingSolver.h b/lib/Core/TimingSolver.h new file mode 100644 index 00000000..875216d9 --- /dev/null +++ b/lib/Core/TimingSolver.h @@ -0,0 +1,70 @@ +//===-- TimingSolver.h ------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_TIMINGSOLVER_H +#define KLEE_TIMINGSOLVER_H + +#include "klee/Expr.h" +#include "klee/Solver.h" + +#include <vector> + +namespace klee { + class ExecutionState; + class Solver; + class STPSolver; + + /// TimingSolver - A simple class which wraps a solver and handles + /// tracking the statistics that we care about. + class TimingSolver { + public: + Solver *solver; + STPSolver *stpSolver; + bool simplifyExprs; + + public: + /// TimingSolver - Construct a new timing solver. + /// + /// \param _simplifyExprs - Whether expressions should be + /// simplified (via the constraint manager interface) prior to + /// querying. + TimingSolver(Solver *_solver, STPSolver *_stpSolver, + bool _simplifyExprs = true) + : solver(_solver), stpSolver(_stpSolver), simplifyExprs(_simplifyExprs) {} + ~TimingSolver() { + delete solver; + } + + void setTimeout(double t) { + stpSolver->setTimeout(t); + } + + bool evaluate(const ExecutionState&, ref<Expr>, Solver::Validity &result); + + bool mustBeTrue(const ExecutionState&, ref<Expr>, bool &result); + + bool mustBeFalse(const ExecutionState&, ref<Expr>, bool &result); + + bool mayBeTrue(const ExecutionState&, ref<Expr>, bool &result); + + bool mayBeFalse(const ExecutionState&, ref<Expr>, bool &result); + + bool getValue(const ExecutionState &, ref<Expr> expr, ref<Expr> &result); + + bool getInitialValues(const ExecutionState&, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &result); + + virtual std::pair< ref<Expr>, ref<Expr> > + getRange(const ExecutionState&, ref<Expr> query); + }; + +} + +#endif diff --git a/lib/Core/UserSearcher.cpp b/lib/Core/UserSearcher.cpp new file mode 100644 index 00000000..1aff9e5e --- /dev/null +++ b/lib/Core/UserSearcher.cpp @@ -0,0 +1,175 @@ +//===-- UserSearcher.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Common.h" + +#include "UserSearcher.h" + +#include "Searcher.h" +#include "Executor.h" + +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace klee; + +namespace { + cl::opt<bool> + UseRandomSearch("use-random-search"); + + cl::opt<bool> + UseInterleavedRS("use-interleaved-RS"); + + cl::opt<bool> + UseInterleavedNURS("use-interleaved-NURS"); + + cl::opt<bool> + UseInterleavedMD2UNURS("use-interleaved-MD2U-NURS"); + + cl::opt<bool> + UseInterleavedInstCountNURS("use-interleaved-icnt-NURS"); + + cl::opt<bool> + UseInterleavedCPInstCountNURS("use-interleaved-cpicnt-NURS"); + + cl::opt<bool> + UseInterleavedQueryCostNURS("use-interleaved-query-cost-NURS"); + + cl::opt<bool> + UseInterleavedCovNewNURS("use-interleaved-covnew-NURS"); + + cl::opt<bool> + UseNonUniformRandomSearch("use-non-uniform-random-search"); + + cl::opt<bool> + UseRandomPathSearch("use-random-path"); + + cl::opt<WeightedRandomSearcher::WeightType> + WeightType("weight-type", cl::desc("Set the weight type for --use-non-uniform-random-search"), + cl::values(clEnumValN(WeightedRandomSearcher::Depth, "none", "use (2^depth)"), + clEnumValN(WeightedRandomSearcher::InstCount, "icnt", "use current pc exec count"), + clEnumValN(WeightedRandomSearcher::CPInstCount, "cpicnt", "use current pc exec count"), + clEnumValN(WeightedRandomSearcher::QueryCost, "query-cost", "use query cost"), + clEnumValN(WeightedRandomSearcher::MinDistToUncovered, "md2u", "use min dist to uncovered"), + clEnumValN(WeightedRandomSearcher::CoveringNew, "covnew", "use min dist to uncovered + coveringNew flag"), + clEnumValEnd)); + + cl::opt<bool> + UseMerge("use-merge", + cl::desc("Enable support for klee_merge() (experimental)")); + + cl::opt<bool> + UseBumpMerge("use-bump-merge", + cl::desc("Enable support for klee_merge() (extra experimental)")); + + cl::opt<bool> + UseIterativeDeepeningTimeSearch("use-iterative-deepening-time-search", + cl::desc("(experimental)")); + + cl::opt<bool> + UseBatchingSearch("use-batching-search", + cl::desc("Use batching searcher (keep running selected state for N instructions/time, see --batch-instructions and --batch-time")); + + cl::opt<unsigned> + BatchInstructions("batch-instructions", + cl::desc("Number of instructions to batch when using --use-batching-search"), + cl::init(10000)); + + cl::opt<double> + BatchTime("batch-time", + cl::desc("Amount of time to batch when using --use-batching-search"), + cl::init(5.0)); +} + +bool klee::userSearcherRequiresMD2U() { + return (WeightType==WeightedRandomSearcher::MinDistToUncovered || + WeightType==WeightedRandomSearcher::CoveringNew || + UseInterleavedMD2UNURS || + UseInterleavedCovNewNURS || + UseInterleavedInstCountNURS || + UseInterleavedCPInstCountNURS || + UseInterleavedQueryCostNURS); +} + +// FIXME: Remove. +bool klee::userSearcherRequiresBranchSequences() { + return false; +} + +Searcher *klee::constructUserSearcher(Executor &executor) { + Searcher *searcher = 0; + + if (UseRandomPathSearch) { + searcher = new RandomPathSearcher(executor); + } else if (UseNonUniformRandomSearch) { + searcher = new WeightedRandomSearcher(executor, WeightType); + } else if (UseRandomSearch) { + searcher = new RandomSearcher(); + } else { + searcher = new DFSSearcher(); + } + + if (UseInterleavedNURS || UseInterleavedMD2UNURS || UseInterleavedRS || + UseInterleavedCovNewNURS || UseInterleavedInstCountNURS || + UseInterleavedCPInstCountNURS || UseInterleavedQueryCostNURS) { + std::vector<Searcher *> s; + s.push_back(searcher); + + if (UseInterleavedNURS) + s.push_back(new WeightedRandomSearcher(executor, + WeightedRandomSearcher::Depth)); + if (UseInterleavedMD2UNURS) + s.push_back(new WeightedRandomSearcher(executor, + WeightedRandomSearcher::MinDistToUncovered)); + + if (UseInterleavedCovNewNURS) + s.push_back(new WeightedRandomSearcher(executor, + WeightedRandomSearcher::CoveringNew)); + + if (UseInterleavedInstCountNURS) + s.push_back(new WeightedRandomSearcher(executor, + WeightedRandomSearcher::InstCount)); + + if (UseInterleavedCPInstCountNURS) + s.push_back(new WeightedRandomSearcher(executor, + WeightedRandomSearcher::CPInstCount)); + + if (UseInterleavedQueryCostNURS) + s.push_back(new WeightedRandomSearcher(executor, + WeightedRandomSearcher::QueryCost)); + + if (UseInterleavedRS) + s.push_back(new RandomSearcher()); + + searcher = new InterleavedSearcher(s); + } + + if (UseBatchingSearch) { + searcher = new BatchingSearcher(searcher, BatchTime, BatchInstructions); + } + + if (UseMerge) { + assert(!UseBumpMerge); + searcher = new MergingSearcher(executor, searcher); + } else if (UseBumpMerge) { + searcher = new BumpMergingSearcher(executor, searcher); + } + + if (UseIterativeDeepeningTimeSearch) { + searcher = new IterativeDeepeningTimeSearcher(searcher); + } + + std::ostream &os = executor.getHandler().getInfoStream(); + + os << "BEGIN searcher description\n"; + searcher->printName(os); + os << "END searcher description\n"; + + return searcher; +} diff --git a/lib/Core/UserSearcher.h b/lib/Core/UserSearcher.h new file mode 100644 index 00000000..9571bf5b --- /dev/null +++ b/lib/Core/UserSearcher.h @@ -0,0 +1,25 @@ +//===-- UserSearcher.h ------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_USERSEARCHER_H +#define KLEE_USERSEARCHER_H + +namespace klee { + class Executor; + class Searcher; + + // XXX gross, should be on demand? + bool userSearcherRequiresMD2U(); + + bool userSearcherRequiresBranchSequences(); + + Searcher *constructUserSearcher(Executor &executor); +} + +#endif diff --git a/lib/Expr/Constraints.cpp b/lib/Expr/Constraints.cpp new file mode 100644 index 00000000..e9c376f4 --- /dev/null +++ b/lib/Expr/Constraints.cpp @@ -0,0 +1,155 @@ +//===-- Constraints.cpp ---------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Constraints.h" + +#include "klee/util/ExprPPrinter.h" +#include "klee/util/ExprVisitor.h" + +#include <iostream> +#include <map> + +using namespace klee; + +class ExprReplaceVisitor : public ExprVisitor { +private: + ref<Expr> src, dst; + +public: + ExprReplaceVisitor(ref<Expr> _src, ref<Expr> _dst) : src(_src), dst(_dst) {} + + Action visitExpr(const Expr &e) { + if (e == *src.get()) { + return Action::changeTo(dst); + } else { + return Action::doChildren(); + } + } + + Action visitExprPost(const Expr &e) { + if (e == *src.get()) { + return Action::changeTo(dst); + } else { + return Action::doChildren(); + } + } +}; + +class ExprReplaceVisitor2 : public ExprVisitor { +private: + const std::map< ref<Expr>, ref<Expr> > &replacements; + +public: + ExprReplaceVisitor2(const std::map< ref<Expr>, ref<Expr> > &_replacements) + : ExprVisitor(true), + replacements(_replacements) {} + + Action visitExprPost(const Expr &e) { + std::map< ref<Expr>, ref<Expr> >::const_iterator it = + replacements.find(ref<Expr>((Expr*) &e)); + if (it!=replacements.end()) { + return Action::changeTo(it->second); + } else { + return Action::doChildren(); + } + } +}; + +bool ConstraintManager::rewriteConstraints(ExprVisitor &visitor) { + ConstraintManager::constraints_ty old; + bool changed = false; + + constraints.swap(old); + for (ConstraintManager::constraints_ty::iterator + it = old.begin(), ie = old.end(); it != ie; ++it) { + ref<Expr> &ce = *it; + ref<Expr> e = visitor.visit(ce); + + if (e!=ce) { + addConstraintInternal(e); // enable further reductions + changed = true; + } else { + constraints.push_back(ce); + } + } + + return changed; +} + +void ConstraintManager::simplifyForValidConstraint(ref<Expr> e) { + // XXX +} + +ref<Expr> ConstraintManager::simplifyExpr(ref<Expr> e) const { + if (e.isConstant()) + return e; + + std::map< ref<Expr>, ref<Expr> > equalities; + + for (ConstraintManager::constraints_ty::const_iterator + it = constraints.begin(), ie = constraints.end(); it != ie; ++it) { + if (const EqExpr *ee = dyn_ref_cast<EqExpr>(*it)) { + if (ee->left.isConstant()) { + equalities.insert(std::make_pair(ee->right, + ee->left)); + } else { + equalities.insert(std::make_pair(*it, + ref<Expr>(1,Expr::Bool))); + } + } else { + equalities.insert(std::make_pair(*it, + ref<Expr>(1,Expr::Bool))); + } + } + + return ExprReplaceVisitor2(equalities).visit(e); +} + +void ConstraintManager::addConstraintInternal(ref<Expr> e) { + // rewrite any known equalities + + // XXX should profile the effects of this and the overhead. + // traversing the constraints looking for equalities is hardly the + // slowest thing we do, but it is probably nicer to have a + // ConstraintSet ADT which efficiently remembers obvious patterns + // (byte-constant comparison). + + switch (e.getKind()) { + case Expr::Constant: + assert(e.getConstantValue() && "attempt to add invalid (false) constraint"); + break; + + // split to enable finer grained independence and other optimizations + case Expr::And: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + addConstraintInternal(be->left); + addConstraintInternal(be->right); + break; + } + + case Expr::Eq: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->left.isConstant()) { + ExprReplaceVisitor visitor(be->right, be->left); + rewriteConstraints(visitor); + } + constraints.push_back(e); + break; + } + + default: + constraints.push_back(e); + break; + } +} + +void ConstraintManager::addConstraint(ref<Expr> e) { + e = simplifyExpr(e); + addConstraintInternal(e); +} diff --git a/lib/Expr/Expr.cpp b/lib/Expr/Expr.cpp new file mode 100644 index 00000000..55b9a0a4 --- /dev/null +++ b/lib/Expr/Expr.cpp @@ -0,0 +1,1122 @@ +//===-- Expr.cpp ----------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Expr.h" + + +#include "klee/Machine.h" +// FIXME: This shouldn't be here. +//#include "klee/Memory.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Streams.h" +// FIXME: We shouldn't need this once fast constant support moves into +// Core. If we need to do arithmetic, we probably want to use APInt. +#include "klee/Internal/Support/IntEvaluation.h" + +#include "klee/util/ExprPPrinter.h" + +using namespace klee; +using namespace llvm; + +namespace { + cl::opt<bool> + ConstArrayOpt("const-array-opt", + cl::init(false), + cl::desc("Enable various optimizations involving all-constant arrays.")); +} + +/***/ + +unsigned Expr::count = 0; + +ref<Expr> Expr::createTempRead(const Array *array, Expr::Width w) { + UpdateList ul(array, true, 0); + + switch (w) { + case Expr::Bool: + return ZExtExpr::create(ReadExpr::create(ul, + ref<Expr>(0,kMachinePointerType)), + Expr::Bool); + case Expr::Int8: + return ReadExpr::create(ul, + ref<Expr>(0,kMachinePointerType)); + case Expr::Int16: + return ConcatExpr::create(ReadExpr::create(ul, + ref<Expr>(1,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(0,kMachinePointerType))); + case Expr::Int32: + return ConcatExpr::create4(ReadExpr::create(ul, + ref<Expr>(3,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(2,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(1,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(0,kMachinePointerType))); + case Expr::Int64: + return ConcatExpr::create8(ReadExpr::create(ul, + ref<Expr>(7,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(6,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(5,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(4,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(3,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(2,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(1,kMachinePointerType)), + ReadExpr::create(ul, + ref<Expr>(0,kMachinePointerType))); + default: assert(0 && "invalid width"); + } +} + +// returns 0 if b is structurally equal to *this +int Expr::compare(const Expr &b) const { + if (this == &b) return 0; + + Kind ak = getKind(), bk = b.getKind(); + if (ak!=bk) + return (ak < bk) ? -1 : 1; + + if (hashValue != b.hashValue) + return (hashValue < b.hashValue) ? -1 : 1; + + if (int res = compareContents(b)) + return res; + + unsigned aN = getNumKids(); + for (unsigned i=0; i<aN; i++) + if (int res = getKid(i).compare(b.getKid(i))) + return res; + + return 0; +} + +void Expr::printKind(std::ostream &os, Kind k) { + switch(k) { +#define X(C) case C: os << #C; break + X(Constant); + X(NotOptimized); + X(Read); + X(Select); + X(Concat); + X(Extract); + X(ZExt); + X(SExt); + X(Add); + X(Sub); + X(Mul); + X(UDiv); + X(SDiv); + X(URem); + X(SRem); + X(And); + X(Or); + X(Xor); + X(Shl); + X(LShr); + X(AShr); + X(Eq); + X(Ne); + X(Ult); + X(Ule); + X(Ugt); + X(Uge); + X(Slt); + X(Sle); + X(Sgt); + X(Sge); +#undef X + default: + assert(0 && "invalid kind"); + } +} + +//////// +// +// Simple hash functions for various kinds of Exprs +// +/////// + +unsigned Expr::computeHash() { + unsigned res = getKind() * Expr::MAGIC_HASH_CONSTANT; + + int n = getNumKids(); + for (int i = 0; i < n; i++) { + res <<= 1; + res ^= getKid(i).hash() * Expr::MAGIC_HASH_CONSTANT; + } + + hashValue = res; + return hashValue; +} + +unsigned ConstantExpr::computeHash() { + hashValue = Expr::hashConstant(asUInt64, width); + return hashValue; +} + +unsigned CastExpr::computeHash() { + unsigned res = getWidth() * Expr::MAGIC_HASH_CONSTANT; + hashValue = res ^ src.hash() * Expr::MAGIC_HASH_CONSTANT; + return hashValue; +} + +unsigned ExtractExpr::computeHash() { + unsigned res = offset * Expr::MAGIC_HASH_CONSTANT; + res ^= getWidth() * Expr::MAGIC_HASH_CONSTANT; + hashValue = res ^ expr.hash() * Expr::MAGIC_HASH_CONSTANT; + return hashValue; +} + +unsigned ReadExpr::computeHash() { + unsigned res = index.hash() * Expr::MAGIC_HASH_CONSTANT; + res ^= updates.hash(); + hashValue = res; + return hashValue; +} + +uint64_t Expr::getConstantValue() const { + assert(getKind() == Constant); + return static_cast<const ConstantExpr*>(this)->asUInt64; +} + +ref<Expr> Expr::createFromKind(Kind k, std::vector<CreateArg> args) { + unsigned numArgs = args.size(); + + switch(k) { + case NotOptimized: + assert(numArgs == 1 && args[0].isExpr() && + "invalid args array for given opcode"); + return NotOptimizedExpr::create(args[0].expr); + + case Select: + assert(numArgs == 3 && args[0].isExpr() && + args[1].isExpr() && args[2].isExpr() && + "invalid args array for Select opcode"); + return SelectExpr::create(args[0].expr, + args[1].expr, + args[2].expr); + + case Concat: { + assert(numArgs == 2 && args[0].isExpr() && args[1].isExpr() && + "invalid args array for Concat opcode"); + + return ConcatExpr::create(args[0].expr, args[1].expr); + } + +#define CAST_EXPR_CASE(T) \ + case T: \ + assert(numArgs == 2 && \ + args[0].isExpr() && args[1].isWidth() && \ + "invalid args array for given opcode"); \ + return T ## Expr::create(args[0].expr, args[1].width); \ + +#define BINARY_EXPR_CASE(T) \ + case T: \ + assert(numArgs == 2 && \ + args[0].isExpr() && args[1].isExpr() && \ + "invalid args array for given opcode"); \ + return T ## Expr::create(args[0].expr, args[1].expr); \ + + CAST_EXPR_CASE(ZExt); + CAST_EXPR_CASE(SExt); + + BINARY_EXPR_CASE(Add); + BINARY_EXPR_CASE(Sub); + BINARY_EXPR_CASE(Mul); + BINARY_EXPR_CASE(UDiv); + BINARY_EXPR_CASE(SDiv); + BINARY_EXPR_CASE(URem); + BINARY_EXPR_CASE(SRem); + BINARY_EXPR_CASE(And); + BINARY_EXPR_CASE(Or); + BINARY_EXPR_CASE(Xor); + BINARY_EXPR_CASE(Shl); + BINARY_EXPR_CASE(LShr); + BINARY_EXPR_CASE(AShr); + + BINARY_EXPR_CASE(Eq); + BINARY_EXPR_CASE(Ne); + BINARY_EXPR_CASE(Ult); + BINARY_EXPR_CASE(Ule); + BINARY_EXPR_CASE(Ugt); + BINARY_EXPR_CASE(Uge); + BINARY_EXPR_CASE(Slt); + BINARY_EXPR_CASE(Sle); + BINARY_EXPR_CASE(Sgt); + BINARY_EXPR_CASE(Sge); + + case Constant: + case Extract: + case Read: + default: + assert(0 && "invalid kind"); + } + +} + + +void Expr::printWidth(std::ostream &os, Width width) { + switch(width) { + case Expr::Bool: os << "Expr::Bool"; break; + case Expr::Int8: os << "Expr::Int8"; break; + case Expr::Int16: os << "Expr::Int16"; break; + case Expr::Int32: os << "Expr::Int32"; break; + case Expr::Int64: os << "Expr::Int64"; break; + default: os << "<invalid type: " << (unsigned) width << ">"; + } +} + +Expr::Width Expr::getWidthForLLVMType(const llvm::Type *t) { + switch (t->getTypeID()) { + case llvm::Type::IntegerTyID: { + Width w = cast<IntegerType>(t)->getBitWidth(); + + // should remove this limitation soon + if (w == 1 || w == 8 || w == 16 || w == 32 || w == 64) + return w; + else { + assert(0 && "XXX arbitrary bit widths unsupported"); + abort(); + } + } + case llvm::Type::FloatTyID: return Expr::Int32; + case llvm::Type::DoubleTyID: return Expr::Int64; + case llvm::Type::X86_FP80TyID: return Expr::Int64; // XXX: needs to be fixed + case llvm::Type::PointerTyID: return kMachinePointerType; + default: + cerr << "non-primitive type argument to Expr::getTypeForLLVMType()\n"; + abort(); + } +} + +ref<Expr> Expr::createImplies(ref<Expr> hyp, ref<Expr> conc) { + return OrExpr::create(Expr::createNot(hyp), conc); +} + +ref<Expr> Expr::createIsZero(ref<Expr> e) { + return EqExpr::create(e, ConstantExpr::create(0, e.getWidth())); +} + +ref<Expr> Expr::createCoerceToPointerType(ref<Expr> e) { + return ZExtExpr::create(e, kMachinePointerType); +} + +ref<Expr> Expr::createNot(ref<Expr> e) { + return createIsZero(e); +} + +ref<Expr> Expr::createPointer(uint64_t v) { + return ConstantExpr::create(v, kMachinePointerType); +} + +Expr* Expr::createConstant(uint64_t val, Width w) { + Expr *r = new ConstantExpr(val, w); + r->computeHash(); + return r; +} + +void Expr::print(std::ostream &os) const { + const ref<Expr> tmp((Expr*)this); + ExprPPrinter::printOne(os, "", tmp); +} + +/***/ + +ref<ConstantExpr> ConstantExpr::fromMemory(void *address, Width width) { + switch (width) { + case Expr::Bool: return ConstantExpr::create(*(( uint8_t*) address), width); + case Expr::Int8: return ConstantExpr::create(*(( uint8_t*) address), width); + case Expr::Int16: return ConstantExpr::create(*((uint16_t*) address), width); + case Expr::Int32: return ConstantExpr::create(*((uint32_t*) address), width); + case Expr::Int64: return ConstantExpr::create(*((uint64_t*) address), width); + default: assert(0 && "invalid type"); + } +} + +void ConstantExpr::toMemory(void *address) { + switch (width) { + case Expr::Bool: *(( uint8_t*) address) = asUInt64; break; + case Expr::Int8: *(( uint8_t*) address) = asUInt64; break; + case Expr::Int16: *((uint16_t*) address) = asUInt64; break; + case Expr::Int32: *((uint32_t*) address) = asUInt64; break; + case Expr::Int64: *((uint64_t*) address) = asUInt64; break; + default: assert(0 && "invalid type"); + } +} + +/***/ + +ref<Expr> NotOptimizedExpr::create(ref<Expr> src) { + return NotOptimizedExpr::alloc(src); +} + +ref<Expr> ReadExpr::create(const UpdateList &ul, ref<Expr> index) { + // rollback index when possible... + + // XXX this doesn't really belong here... there are basically two + // cases, one is rebuild, where we want to optimistically try various + // optimizations when the index has changed, and the other is + // initial creation, where we expect the ObjectState to have constructed + // a smart UpdateList so it is not worth rescanning. + + const UpdateNode *un = ul.head; + for (; un; un=un->next) { + ref<Expr> cond = EqExpr::create(index, un->index); + + if (cond.isConstant()) { + if (cond.getConstantValue()) + return un->value; + } else { + break; + } + } + + return ReadExpr::alloc(ul, index); +} + +int ReadExpr::compareContents(const Expr &b) const { + return updates.compare(static_cast<const ReadExpr&>(b).updates); +} + +ref<Expr> SelectExpr::create(ref<Expr> c, ref<Expr> t, ref<Expr> f) { + Expr::Width kt = t.getWidth(); + + assert(c.getWidth()==Bool && "type mismatch"); + assert(kt==f.getWidth() && "type mismatch"); + + if (c.isConstant()) { + return c.getConstantValue() ? t : f; + } else if (t==f) { + return t; + } else if (kt==Expr::Bool) { // c ? t : f <=> (c and t) or (not c and f) + if (t.isConstant()) { + if (t.getConstantValue()) { + return OrExpr::create(c, f); + } else { + return AndExpr::create(Expr::createNot(c), f); + } + } else if (f.isConstant()) { + if (f.getConstantValue()) { + return OrExpr::create(Expr::createNot(c), t); + } else { + return AndExpr::create(c, t); + } + } + } + + return SelectExpr::alloc(c, t, f); +} + +/***/ + + +ref<Expr> ConcatExpr::create(const ref<Expr> &l, const ref<Expr> &r) { + Expr::Width w = l.getWidth() + r.getWidth(); + + /* Constant folding */ + if (l.getKind() == Expr::Constant && r.getKind() == Expr::Constant) { + // XXX: should fix this constant limitation soon + assert(w <= 64 && "ConcatExpr::create(): don't support concats describing constants greater than 64 bits yet"); + + uint64_t res = (l.getConstantValue() << r.getWidth()) + r.getConstantValue(); + return ConstantExpr::create(res, w); + } + + // Merge contiguous Extracts + if (l.getKind() == Expr::Extract && r.getKind() == Expr::Extract) { + const ExtractExpr* ee_left = static_ref_cast<ExtractExpr>(l); + const ExtractExpr* ee_right = static_ref_cast<ExtractExpr>(r); + if (ee_left->expr == ee_right->expr && + ee_right->offset + ee_right->width == ee_left->offset) { + return ExtractExpr::create(ee_left->expr, ee_right->offset, w); + } + } + + return ConcatExpr::alloc(l, r); +} + +/// Shortcut to concat N kids. The chain returned is unbalanced to the right +ref<Expr> ConcatExpr::createN(unsigned n_kids, const ref<Expr> kids[]) { + assert(n_kids > 0); + if (n_kids == 1) + return kids[0]; + + ref<Expr> r = ConcatExpr::create(kids[n_kids-2], kids[n_kids-1]); + for (int i=n_kids-3; i>=0; i--) + r = ConcatExpr::create(kids[i], r); + return r; +} + +/// Shortcut to concat 4 kids. The chain returned is unbalanced to the right +ref<Expr> ConcatExpr::create4(const ref<Expr> &kid1, const ref<Expr> &kid2, + const ref<Expr> &kid3, const ref<Expr> &kid4) { + return ConcatExpr::create(kid1, ConcatExpr::create(kid2, ConcatExpr::create(kid3, kid4))); +} + +/// Shortcut to concat 8 kids. The chain returned is unbalanced to the right +ref<Expr> ConcatExpr::create8(const ref<Expr> &kid1, const ref<Expr> &kid2, + const ref<Expr> &kid3, const ref<Expr> &kid4, + const ref<Expr> &kid5, const ref<Expr> &kid6, + const ref<Expr> &kid7, const ref<Expr> &kid8) { + return ConcatExpr::create(kid1, ConcatExpr::create(kid2, ConcatExpr::create(kid3, + ConcatExpr::create(kid4, ConcatExpr::create4(kid5, kid6, kid7, kid8))))); +} + +/***/ + +ref<Expr> ExtractExpr::create(ref<Expr> expr, unsigned off, Width w) { + unsigned kw = expr.getWidth(); + assert(w > 0 && off + w <= kw && "invalid extract"); + + if (w == kw) + return expr; + else if (expr.isConstant()) { + return ConstantExpr::create(ints::trunc(expr.getConstantValue() >> off, w, kw), w); + } + else + // Extract(Concat) + if (ConcatExpr *ce = dyn_ref_cast<ConcatExpr>(expr)) { + // if the extract skips the right side of the concat + if (off >= ce->getRight().getWidth()) + return ExtractExpr::create(ce->getLeft(), off - ce->getRight().getWidth(), w); + + // if the extract skips the left side of the concat + if (off + w <= ce->getRight().getWidth()) + return ExtractExpr::create(ce->getRight(), off, w); + + // E(C(x,y)) = C(E(x), E(y)) + return ConcatExpr::create(ExtractExpr::create(ce->getKid(0), 0, w - ce->getKid(1).getWidth() + off), + ExtractExpr::create(ce->getKid(1), off, ce->getKid(1).getWidth() - off)); + } + + return ExtractExpr::alloc(expr, off, w); +} + + +ref<Expr> ExtractExpr::createByteOff(ref<Expr> expr, unsigned offset, Width bits) { + return ExtractExpr::create(expr, 8*offset, bits); +} + +/***/ + +ref<Expr> ZExtExpr::create(const ref<Expr> &e, Width w) { + unsigned kBits = e.getWidth(); + if (w == kBits) { + return e; + } else if (w < kBits) { // trunc + return ExtractExpr::createByteOff(e, 0, w); + } else { + if (e.isConstant()) { + return ConstantExpr::create(ints::zext(e.getConstantValue(), w, kBits), + w); + } + + return ZExtExpr::alloc(e, w); + } +} + +ref<Expr> SExtExpr::create(const ref<Expr> &e, Width w) { + unsigned kBits = e.getWidth(); + if (w == kBits) { + return e; + } else if (w < kBits) { // trunc + return ExtractExpr::createByteOff(e, 0, w); + } else { + if (e.isConstant()) { + return ConstantExpr::create(ints::sext(e.getConstantValue(), w, kBits), + w); + } + + return SExtExpr::alloc(e, w); + } +} + +/***/ + +static ref<Expr> AndExpr_create(Expr *l, Expr *r); +static ref<Expr> XorExpr_create(Expr *l, Expr *r); + +static ref<Expr> EqExpr_createPartial(Expr *l, const ref<Expr> &cr); +static ref<Expr> AndExpr_createPartialR(const ref<Expr> &cl, Expr *r); +static ref<Expr> SubExpr_createPartialR(const ref<Expr> &cl, Expr *r); +static ref<Expr> XorExpr_createPartialR(const ref<Expr> &cl, Expr *r); + +static ref<Expr> AddExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + assert(cl.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cl.getConstantValue(); + Expr::Width type = cl.getWidth(); + + if (type==Expr::Bool) { + return XorExpr_createPartialR(cl, r); + } else if (!value) { + return r; + } else { + Expr::Kind rk = r->getKind(); + if (rk==Expr::Add && r->getKid(0).isConstant()) { // A + (B+c) == (A+B) + c + return AddExpr::create(AddExpr::create(cl, r->getKid(0)), + r->getKid(1)); + } else if (rk==Expr::Sub && r->getKid(0).isConstant()) { // A + (B-c) == (A+B) - c + return SubExpr::create(AddExpr::create(cl, r->getKid(0)), + r->getKid(1)); + } else { + return AddExpr::alloc(cl, r); + } + } +} +static ref<Expr> AddExpr_createPartial(Expr *l, const ref<Expr> &cr) { + return AddExpr_createPartialR(cr, l); +} +static ref<Expr> AddExpr_create(Expr *l, Expr *r) { + Expr::Width type = l->getWidth(); + + if (type == Expr::Bool) { + return XorExpr_create(l, r); + } else { + Expr::Kind lk = l->getKind(), rk = r->getKind(); + if (lk==Expr::Add && l->getKid(0).isConstant()) { // (k+a)+b = k+(a+b) + return AddExpr::create(l->getKid(0), + AddExpr::create(l->getKid(1), r)); + } else if (lk==Expr::Sub && l->getKid(0).isConstant()) { // (k-a)+b = k+(b-a) + return AddExpr::create(l->getKid(0), + SubExpr::create(r, l->getKid(1))); + } else if (rk==Expr::Add && r->getKid(0).isConstant()) { // a + (k+b) = k+(a+b) + return AddExpr::create(r->getKid(0), + AddExpr::create(l, r->getKid(1))); + } else if (rk==Expr::Sub && r->getKid(0).isConstant()) { // a + (k-b) = k+(a-b) + return AddExpr::create(r->getKid(0), + SubExpr::create(l, r->getKid(1))); + } else { + return AddExpr::alloc(l, r); + } + } +} + +static ref<Expr> SubExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + assert(cl.isConstant() && "non-constant passed in place of constant"); + Expr::Width type = cl.getWidth(); + + if (type==Expr::Bool) { + return XorExpr_createPartialR(cl, r); + } else { + Expr::Kind rk = r->getKind(); + if (rk==Expr::Add && r->getKid(0).isConstant()) { // A - (B+c) == (A-B) - c + return SubExpr::create(SubExpr::create(cl, r->getKid(0)), + r->getKid(1)); + } else if (rk==Expr::Sub && r->getKid(0).isConstant()) { // A - (B-c) == (A-B) + c + return AddExpr::create(SubExpr::create(cl, r->getKid(0)), + r->getKid(1)); + } else { + return SubExpr::alloc(cl, r); + } + } +} +static ref<Expr> SubExpr_createPartial(Expr *l, const ref<Expr> &cr) { + assert(cr.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cr.getConstantValue(); + Expr::Width width = cr.getWidth(); + uint64_t nvalue = ints::sub(0, value, width); + + return AddExpr_createPartial(l, ConstantExpr::create(nvalue, width)); +} +static ref<Expr> SubExpr_create(Expr *l, Expr *r) { + Expr::Width type = l->getWidth(); + + if (type == Expr::Bool) { + return XorExpr_create(l, r); + } else if (*l==*r) { + return ref<Expr>(0, type); + } else { + Expr::Kind lk = l->getKind(), rk = r->getKind(); + if (lk==Expr::Add && l->getKid(0).isConstant()) { // (k+a)-b = k+(a-b) + return AddExpr::create(l->getKid(0), + SubExpr::create(l->getKid(1), r)); + } else if (lk==Expr::Sub && l->getKid(0).isConstant()) { // (k-a)-b = k-(a+b) + return SubExpr::create(l->getKid(0), + AddExpr::create(l->getKid(1), r)); + } else if (rk==Expr::Add && r->getKid(0).isConstant()) { // a - (k+b) = (a-c) - k + return SubExpr::create(SubExpr::create(l, r->getKid(1)), + r->getKid(0)); + } else if (rk==Expr::Sub && r->getKid(0).isConstant()) { // a - (k-b) = (a+b) - k + return SubExpr::create(AddExpr::create(l, r->getKid(1)), + r->getKid(0)); + } else { + return SubExpr::alloc(l, r); + } + } +} + +static ref<Expr> MulExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + assert(cl.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cl.getConstantValue(); + Expr::Width type = cl.getWidth(); + + if (type == Expr::Bool) { + return AndExpr_createPartialR(cl, r); + } else if (value == 1) { + return r; + } else if (!value) { + return cl; + } else { + return MulExpr::alloc(cl, r); + } +} +static ref<Expr> MulExpr_createPartial(Expr *l, const ref<Expr> &cr) { + return MulExpr_createPartialR(cr, l); +} +static ref<Expr> MulExpr_create(Expr *l, Expr *r) { + Expr::Width type = l->getWidth(); + + if (type == Expr::Bool) { + return AndExpr::alloc(l, r); + } else { + return MulExpr::alloc(l, r); + } +} + +static ref<Expr> AndExpr_createPartial(Expr *l, const ref<Expr> &cr) { + assert(cr.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cr.getConstantValue(); + Expr::Width width = cr.getWidth();; + + if (value==ints::sext(1, width, 1)) { + return l; + } else if (!value) { + return cr; + } else { + return AndExpr::alloc(l, cr); + } +} +static ref<Expr> AndExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + return AndExpr_createPartial(r, cl); +} +static ref<Expr> AndExpr_create(Expr *l, Expr *r) { + return AndExpr::alloc(l, r); +} + +static ref<Expr> OrExpr_createPartial(Expr *l, const ref<Expr> &cr) { + assert(cr.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cr.getConstantValue(); + Expr::Width width = cr.getWidth(); + + if (value == ints::sext(1, width, 1)) { + return cr; + } else if (!value) { + return l; + } else { + return OrExpr::alloc(l, cr); + } +} +static ref<Expr> OrExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + return OrExpr_createPartial(r, cl); +} +static ref<Expr> OrExpr_create(Expr *l, Expr *r) { + return OrExpr::alloc(l, r); +} + +static ref<Expr> XorExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + assert(cl.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cl.getConstantValue(); + Expr::Width type = cl.getWidth(); + + if (type==Expr::Bool) { + if (value) { + return EqExpr_createPartial(r, ConstantExpr::create(0, Expr::Bool)); + } else { + return r; + } + } else if (!value) { + return r; + } else { + return XorExpr::alloc(cl, r); + } +} + +static ref<Expr> XorExpr_createPartial(Expr *l, const ref<Expr> &cr) { + return XorExpr_createPartialR(cr, l); +} +static ref<Expr> XorExpr_create(Expr *l, Expr *r) { + return XorExpr::alloc(l, r); +} + +static ref<Expr> UDivExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // r must be 1 + return l; + } else{ + return UDivExpr::alloc(l, r); + } +} + +static ref<Expr> SDivExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // r must be 1 + return l; + } else{ + return SDivExpr::alloc(l, r); + } +} + +static ref<Expr> URemExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // r must be 1 + return ConstantExpr::create(0, Expr::Bool); + } else{ + return URemExpr::alloc(l, r); + } +} + +static ref<Expr> SRemExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // r must be 1 + return ConstantExpr::create(0, Expr::Bool); + } else{ + return SRemExpr::alloc(l, r); + } +} + +static ref<Expr> ShlExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // l & !r + return AndExpr::create(l, Expr::createNot(r)); + } else{ + return ShlExpr::alloc(l, r); + } +} + +static ref<Expr> LShrExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // l & !r + return AndExpr::create(l, Expr::createNot(r)); + } else{ + return LShrExpr::alloc(l, r); + } +} + +static ref<Expr> AShrExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // l + return l; + } else{ + return AShrExpr::alloc(l, r); + } +} + +#define BCREATE_R(_e_op, _op, partialL, partialR) \ +ref<Expr> _e_op ::create(const ref<Expr> &l, const ref<Expr> &r) { \ + assert(l.getWidth()==r.getWidth() && "type mismatch"); \ + if (l.isConstant()) { \ + if (r.isConstant()) { \ + Expr::Width width = l.getWidth(); \ + uint64_t val = ints::_op(l.getConstantValue(), \ + r.getConstantValue(), width); \ + return ConstantExpr::create(val, width); \ + } else { \ + return _e_op ## _createPartialR(l, r.get()); \ + } \ + } else if (r.isConstant()) { \ + return _e_op ## _createPartial(l.get(), r); \ + } \ + return _e_op ## _create(l.get(), r.get()); \ +} + +#define BCREATE(_e_op, _op) \ +ref<Expr> _e_op ::create(const ref<Expr> &l, const ref<Expr> &r) { \ + assert(l.getWidth()==r.getWidth() && "type mismatch"); \ + if (l.isConstant()) { \ + if (r.isConstant()) { \ + Expr::Width width = l.getWidth(); \ + uint64_t val = ints::_op(l.getConstantValue(), \ + r.getConstantValue(), width); \ + return ConstantExpr::create(val, width); \ + } \ + } \ + return _e_op ## _create(l, r); \ +} + +BCREATE_R(AddExpr, add, AddExpr_createPartial, AddExpr_createPartialR) +BCREATE_R(SubExpr, sub, SubExpr_createPartial, SubExpr_createPartialR) +BCREATE_R(MulExpr, mul, MulExpr_createPartial, MulExpr_createPartialR) +BCREATE_R(AndExpr, land, AndExpr_createPartial, AndExpr_createPartialR) +BCREATE_R(OrExpr, lor, OrExpr_createPartial, OrExpr_createPartialR) +BCREATE_R(XorExpr, lxor, XorExpr_createPartial, XorExpr_createPartialR) +BCREATE(UDivExpr, udiv) +BCREATE(SDivExpr, sdiv) +BCREATE(URemExpr, urem) +BCREATE(SRemExpr, srem) +BCREATE(ShlExpr, shl) +BCREATE(LShrExpr, lshr) +BCREATE(AShrExpr, ashr) + +#define CMPCREATE(_e_op, _op) \ +ref<Expr> _e_op ::create(const ref<Expr> &l, const ref<Expr> &r) { \ + assert(l.getWidth()==r.getWidth() && "type mismatch"); \ + if (l.isConstant()) { \ + if (r.isConstant()) { \ + Expr::Width width = l.getWidth(); \ + uint64_t val = ints::_op(l.getConstantValue(), \ + r.getConstantValue(), width); \ + return ConstantExpr::create(val, Expr::Bool); \ + } \ + } \ + return _e_op ## _create(l, r); \ +} + +#define CMPCREATE_T(_e_op, _op, _reflexive_e_op, partialL, partialR) \ +ref<Expr> _e_op ::create(const ref<Expr> &l, const ref<Expr> &r) { \ + assert(l.getWidth()==r.getWidth() && "type mismatch"); \ + if (l.isConstant()) { \ + if (r.isConstant()) { \ + Expr::Width width = l.getWidth(); \ + uint64_t val = ints::_op(l.getConstantValue(), \ + r.getConstantValue(), width); \ + return ConstantExpr::create(val, Expr::Bool); \ + } else { \ + return partialR(l, r.get()); \ + } \ + } else if (r.isConstant()) { \ + return partialL(l.get(), r); \ + } else { \ + return _e_op ## _create(l.get(), r.get()); \ + } \ +} + + +static ref<Expr> EqExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l == r) { + return ref<Expr>(1, Expr::Bool); + } else { + return EqExpr::alloc(l, r); + } +} + + +/// Tries to optimize EqExpr cl == rd, where cl is a ConstantExpr and +/// rd a ReadExpr. If rd is a read into an all-constant array, +/// returns a disjunction of equalities on the index. Otherwise, +/// returns the initial equality expression. +static ref<Expr> TryConstArrayOpt(const ref<Expr> &cl, + ReadExpr *rd) { + assert(cl.isConstant() && "constant expression required"); + assert(rd->getKind() == Expr::Read && "read expression required"); + + uint64_t ct = cl.getConstantValue(); + ref<Expr> first_idx_match; + + // number of positions in the array that contain value ct + unsigned matches = 0; + + //llvm::cerr << "Size updates/root: " << rd->updates.getSize() << " / " << (rd->updates.root)->size << "\n"; + + // for now, just assume standard "flushing" of a concrete array, + // where the concrete array has one update for each index, in order + bool all_const = true; + if (rd->updates.getSize() == rd->updates.root->size) { + unsigned k = rd->updates.getSize(); + for (const UpdateNode *un = rd->updates.head; un; un = un->next) { + assert(k > 0); + k--; + + ref<Expr> idx = un->index; + ref<Expr> val = un->value; + if (!idx.isConstant() || !val.isConstant()) { + all_const = false; + //llvm::cerr << "Idx or val not constant\n"; + break; + } + else { + if (idx.getConstantValue() != k) { + all_const = false; + //llvm::cerr << "Wrong constant\n"; + break; + } + if (val.getConstantValue() == ct) { + matches++; + if (matches == 1) + first_idx_match = un->index; + } + } + } + } + else all_const = false; + + if (all_const && matches <= 100) { + // apply optimization + //llvm::cerr << "\n\n=== Applying const array optimization ===\n\n"; + + if (matches == 0) + return ref<Expr>(0, Expr::Bool); + + ref<Expr> res = EqExpr::create(first_idx_match, rd->index); + if (matches == 1) + return res; + + for (const UpdateNode *un = rd->updates.head; un; un = un->next) { + if (un->index != first_idx_match && un->value.getConstantValue() == ct) { + ref<Expr> curr_eq = EqExpr::create(un->index, rd->index); + res = OrExpr::create(curr_eq, res); + } + } + + return res; + } + + return EqExpr_create(cl, ref<Expr>(rd)); +} + + +static ref<Expr> EqExpr_createPartialR(const ref<Expr> &cl, Expr *r) { + assert(cl.isConstant() && "non-constant passed in place of constant"); + uint64_t value = cl.getConstantValue(); + Expr::Width width = cl.getWidth(); + + Expr::Kind rk = r->getKind(); + if (width == Expr::Bool) { + if (value) { + return r; + } else { + // 0 != ... + + if (rk == Expr::Eq) { + const EqExpr *ree = static_ref_cast<EqExpr>(r); + + // eliminate double negation + if (ree->left.isConstant() && + ree->left.getWidth()==Expr::Bool) { + assert(!ree->left.getConstantValue()); + return ree->right; + } + } else if (rk == Expr::Or) { + const OrExpr *roe = static_ref_cast<OrExpr>(r); + + // transform not(or(a,b)) to and(not a, not b) + return AndExpr::create(Expr::createNot(roe->left), + Expr::createNot(roe->right)); + } + } + } else if (rk == Expr::SExt) { + // (sext(a,T)==c) == (a==c) + const SExtExpr *see = static_ref_cast<SExtExpr>(r); + Expr::Width fromBits = see->src.getWidth(); + uint64_t trunc = bits64::truncateToNBits(value, fromBits); + + // pathological check, make sure it is possible to + // sext to this value *from any value* + if (value == ints::sext(trunc, width, fromBits)) { + return EqExpr::create(see->src, ConstantExpr::create(trunc, fromBits)); + } else { + return ConstantExpr::create(0, Expr::Bool); + } + } else if (rk == Expr::ZExt) { + // (zext(a,T)==c) == (a==c) + const ZExtExpr *zee = static_ref_cast<ZExtExpr>(r); + Expr::Width fromBits = zee->src.getWidth(); + uint64_t trunc = bits64::truncateToNBits(value, fromBits); + + // pathological check, make sure it is possible to + // zext to this value *from any value* + if (value == ints::zext(trunc, width, fromBits)) { + return EqExpr::create(zee->src, ConstantExpr::create(trunc, fromBits)); + } else { + return ConstantExpr::create(0, Expr::Bool); + } + } else if (rk==Expr::Add) { + const AddExpr *ae = static_ref_cast<AddExpr>(r); + if (ae->left.isConstant()) { + // c0 = c1 + b => c0 - c1 = b + return EqExpr_createPartialR(SubExpr::create(cl, ae->left), + ae->right.get()); + } + } else if (rk==Expr::Sub) { + const SubExpr *se = static_ref_cast<SubExpr>(r); + if (se->left.isConstant()) { + // c0 = c1 - b => c1 - c0 = b + return EqExpr_createPartialR(SubExpr::create(se->left, cl), + se->right.get()); + } + } else if (rk == Expr::Read && ConstArrayOpt) { + return TryConstArrayOpt(cl, static_cast<ReadExpr*>(r)); + } + + return EqExpr_create(cl, r); +} + +static ref<Expr> EqExpr_createPartial(Expr *l, const ref<Expr> &cr) { + return EqExpr_createPartialR(cr, l); +} + +ref<Expr> NeExpr::create(const ref<Expr> &l, const ref<Expr> &r) { + return EqExpr::create(ConstantExpr::create(0, Expr::Bool), + EqExpr::create(l, r)); +} + +ref<Expr> UgtExpr::create(const ref<Expr> &l, const ref<Expr> &r) { + return UltExpr::create(r, l); +} +ref<Expr> UgeExpr::create(const ref<Expr> &l, const ref<Expr> &r) { + return UleExpr::create(r, l); +} + +ref<Expr> SgtExpr::create(const ref<Expr> &l, const ref<Expr> &r) { + return SltExpr::create(r, l); +} +ref<Expr> SgeExpr::create(const ref<Expr> &l, const ref<Expr> &r) { + return SleExpr::create(r, l); +} + +static ref<Expr> UltExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + Expr::Width t = l.getWidth(); + if (t == Expr::Bool) { // !l && r + return AndExpr::create(Expr::createNot(l), r); + } else { + if (r.isConstant()) { + uint64_t value = r.getConstantValue(); + if (value <= 8) { + ref<Expr> res(0,Expr::Bool); + for (unsigned i=0; i<value; i++) { + res = OrExpr::create(EqExpr::create(l, ref<Expr>(i,t)), res); + } + // llvm::cerr << l << "<" << r << " <=> " << res << "\n"; + return res; + } + } + return UltExpr::alloc(l, r); + } +} + +static ref<Expr> UleExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // !(l && !r) + return OrExpr::create(Expr::createNot(l), r); + } else { + return UleExpr::alloc(l, r); + } +} + +static ref<Expr> SltExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // l && !r + return AndExpr::create(l, Expr::createNot(r)); + } else { + return SltExpr::alloc(l, r); + } +} + +static ref<Expr> SleExpr_create(const ref<Expr> &l, const ref<Expr> &r) { + if (l.getWidth() == Expr::Bool) { // !(!l && r) + return OrExpr::create(l, Expr::createNot(r)); + } else { + return SleExpr::alloc(l, r); + } +} + +CMPCREATE_T(EqExpr, eq, EqExpr, EqExpr_createPartial, EqExpr_createPartialR) +CMPCREATE(UltExpr, ult) +CMPCREATE(UleExpr, ule) +CMPCREATE(SltExpr, slt) +CMPCREATE(SleExpr, sle) diff --git a/lib/Expr/ExprEvaluator.cpp b/lib/Expr/ExprEvaluator.cpp new file mode 100644 index 00000000..102387e1 --- /dev/null +++ b/lib/Expr/ExprEvaluator.cpp @@ -0,0 +1,74 @@ +//===-- ExprEvaluator.cpp -------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/util/ExprEvaluator.h" + +using namespace klee; + +ExprVisitor::Action ExprEvaluator::evalRead(const UpdateList &ul, + unsigned index) { + for (const UpdateNode *un=ul.head; un; un=un->next) { + ref<Expr> ui = visit(un->index); + + if (ui.isConstant()) { + if (ui.getConstantValue() == index) + return Action::changeTo(visit(un->value)); + } else { + // update index is unknown, so may or may not be index, we + // cannot guarantee value. we can rewrite to read at this + // version though (mostly for debugging). + + UpdateList fwd(ul.root, un, 0); + return Action::changeTo(ReadExpr::create(fwd, + ref<Expr>(index,Expr::Int32))); + } + } + + return Action::changeTo(getInitialValue(*ul.root, index)); +} + +ExprVisitor::Action ExprEvaluator::visitRead(const ReadExpr &re) { + ref<Expr> v = visit(re.index); + + if (v.isConstant()) { + return evalRead(re.updates, v.getConstantValue()); + } else { + return Action::doChildren(); + } +} + +// we need to check for div by zero during partial evaluation, +// if this occurs then simply ignore the 0 divisor and use the +// original expression. +ExprVisitor::Action ExprEvaluator::protectedDivOperation(const BinaryExpr &e) { + ref<Expr> kids[2] = { visit(e.left), + visit(e.right) }; + + if (kids[1].isConstant() && !kids[1].getConstantValue()) + kids[1] = e.right; + + if (kids[0]!=e.left || kids[1]!=e.right) { + return Action::changeTo(e.rebuild(kids)); + } else { + return Action::skipChildren(); + } +} + +ExprVisitor::Action ExprEvaluator::visitUDiv(const UDivExpr &e) { + return protectedDivOperation(e); +} +ExprVisitor::Action ExprEvaluator::visitSDiv(const SDivExpr &e) { + return protectedDivOperation(e); +} +ExprVisitor::Action ExprEvaluator::visitURem(const URemExpr &e) { + return protectedDivOperation(e); +} +ExprVisitor::Action ExprEvaluator::visitSRem(const SRemExpr &e) { + return protectedDivOperation(e); +} diff --git a/lib/Expr/ExprPPrinter.cpp b/lib/Expr/ExprPPrinter.cpp new file mode 100644 index 00000000..dc7f4f64 --- /dev/null +++ b/lib/Expr/ExprPPrinter.cpp @@ -0,0 +1,478 @@ +//===-- ExprPPrinter.cpp - ----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/util/ExprPPrinter.h" + +#include "klee/Constraints.h" + +#include "llvm/Support/CommandLine.h" + +#include <map> +#include <vector> +#include <iostream> +#include <sstream> +#include <iomanip> + +using namespace klee; + +namespace { + llvm::cl::opt<bool> + PCWidthAsArg("pc-width-as-arg", llvm::cl::init(true)); + + llvm::cl::opt<bool> + PCAllWidths("pc-all-widths", llvm::cl::init(false)); + + llvm::cl::opt<bool> + PCPrefixWidth("pc-prefix-width", llvm::cl::init(true)); + + llvm::cl::opt<bool> + PCMultibyteReads("pc-multibyte-reads", llvm::cl::init(true)); + + llvm::cl::opt<bool> + PCAllConstWidths("pc-all-const-widths", llvm::cl::init(false)); +} + +/// PrintContext - Helper class for storing extra information for +/// the pretty printer. +class PrintContext { +private: + std::ostream &os; + std::stringstream ss; + std::string newline; + +public: + /// Number of characters on the current line. + unsigned pos; + +public: + PrintContext(std::ostream &_os) : os(_os), newline("\n"), pos(0) {} + + void setNewline(const std::string &_newline) { + newline = _newline; + } + + void breakLine(unsigned indent=0) { + os << newline; + if (indent) + os << std::setw(indent) << ' '; + pos = indent; + } + + /// write - Output a string to the stream and update the + /// position. The stream should not have any newlines. + void write(const std::string &s) { + os << s; + pos += s.length(); + } + + template <typename T> + PrintContext &operator<<(T elt) { + ss.str(""); + ss << elt; + write(ss.str()); + return *this; + } +}; + +class PPrinter : public ExprPPrinter { + std::map<ref<Expr>, unsigned> bindings; + std::map<const UpdateNode*, unsigned> updateBindings; + std::set< ref<Expr> > couldPrint, shouldPrint; + std::set<const UpdateNode*> couldPrintUpdates, shouldPrintUpdates; + std::ostream &os; + unsigned counter; + unsigned updateCounter; + bool hasScan; + std::string newline; + + /// shouldPrintWidth - Predicate for whether this expression should + /// be printed with its width. + bool shouldPrintWidth(ref<Expr> e) { + if (PCAllWidths) + return true; + return e.getWidth() != Expr::Bool; + } + + bool isVerySimple(const ref<Expr> &e) { + return e.isConstant() || bindings.find(e)!=bindings.end(); + } + + bool isVerySimpleUpdate(const UpdateNode *un) { + return !un || updateBindings.find(un)!=updateBindings.end(); + } + + + // document me! + bool isSimple(const ref<Expr> &e) { + if (isVerySimple(e)) { + return true; + } else if (const ReadExpr *re = dyn_ref_cast<ReadExpr>(e)) { + return isVerySimple(re->index) && isVerySimpleUpdate(re->updates.head); + } else { + Expr *ep = e.get(); + for (unsigned i=0; i<ep->getNumKids(); i++) + if (!isVerySimple(ep->getKid(i))) + return false; + return true; + } + } + + bool hasSimpleKids(const Expr *ep) { + for (unsigned i=0; i<ep->getNumKids(); i++) + if (!isSimple(ep->getKid(i))) + return false; + return true; + } + + void scanUpdate(const UpdateNode *un) { + if (un) { + if (couldPrintUpdates.insert(un).second) { + scanUpdate(un->next); + scan1(un->index); + scan1(un->value); + } else { + shouldPrintUpdates.insert(un); + } + } + } + + void scan1(const ref<Expr> &e) { + if (!e.isConstant()) { + if (couldPrint.insert(e).second) { + Expr *ep = e.get(); + for (unsigned i=0; i<ep->getNumKids(); i++) + scan1(ep->getKid(i)); + if (const ReadExpr *re = dyn_ref_cast<ReadExpr>(e)) + scanUpdate(re->updates.head); + } else { + shouldPrint.insert(e); + } + } + } + + void printUpdateList(const UpdateList &updates, PrintContext &PC) { + const UpdateNode *head = updates.head; + + // Special case empty list. + if (!head) { + if (updates.isRooted) { + PC << "arr" << updates.root->id; + } else { + PC << "[]"; + } + return; + } + + // FIXME: Explain this breaking policy. + bool openedList = false, nextShouldBreak = false; + unsigned outerIndent = PC.pos; + unsigned middleIndent = 0; + for (const UpdateNode *un = head; un; un = un->next) { + // We are done if we hit the cache. + std::map<const UpdateNode*, unsigned>::iterator it = + updateBindings.find(un); + if (it!=updateBindings.end()) { + if (openedList) + PC << "] @ "; + PC << "U" << it->second; + return; + } else if (!hasScan || shouldPrintUpdates.count(un)) { + if (openedList) + PC << "] @"; + if (un != head) + PC.breakLine(outerIndent); + PC << "U" << updateCounter << ":"; + updateBindings.insert(std::make_pair(un, updateCounter++)); + openedList = nextShouldBreak = false; + } + + if (!openedList) { + openedList = 1; + PC << '['; + middleIndent = PC.pos; + } else { + PC << ','; + printSeparator(PC, !nextShouldBreak, middleIndent); + } + //PC << "(="; + //unsigned innerIndent = PC.pos; + print(un->index, PC); + //printSeparator(PC, isSimple(un->index), innerIndent); + PC << "="; + print(un->value, PC); + //PC << ')'; + + nextShouldBreak = !(un->index.isConstant() && un->value.isConstant()); + } + + if (openedList) + PC << ']'; + + if (updates.isRooted) + PC << " @ arr" << updates.root->id; + } + + void printWidth(PrintContext &PC, ref<Expr> e) { + if (!shouldPrintWidth(e)) + return; + + if (PCWidthAsArg) { + PC << ' '; + if (PCPrefixWidth) + PC << 'w'; + } + + PC << e.getWidth(); + } + + /// hasOrderedReads - True iff all children are reads with + /// consecutive offsets according to the given \arg stride. + bool hasOrderedReads(const Expr *ep, int stride) { + const ReadExpr *base = dyn_ref_cast<ReadExpr>(ep->getKid(0)); + if (!base) + return false; + + // Get stride expr in proper index width. + Expr::Width idxWidth = base->index.getWidth(); + ref<Expr> strideExpr(stride, idxWidth), offset(0, idxWidth); + for (unsigned i=1; i<ep->getNumKids(); ++i) { + const ReadExpr *re = dyn_ref_cast<ReadExpr>(ep->getKid(i)); + if (!re) + return false; + + // Check if the index follows the stride. + // FIXME: How aggressive should this be simplified. The + // canonicalizing builder is probably the right choice, but this + // is yet another area where we would really prefer it to be + // global or else use static methods. + offset = AddExpr::create(offset, strideExpr); + if (SubExpr::create(re->index, base->index) != offset) + return false; + } + + return true; + } + + /// hasAllByteReads - True iff all children are byte level reads. + bool hasAllByteReads(const Expr *ep) { + for (unsigned i=0; i<ep->getNumKids(); ++i) { + const ReadExpr *re = dyn_ref_cast<ReadExpr>(ep->getKid(i)); + if (!re || re->getWidth() != Expr::Int8) + return false; + } + return true; + } + + void printRead(const ReadExpr *re, PrintContext &PC, unsigned indent) { + print(re->index, PC); + printSeparator(PC, isVerySimple(re->index), indent); + printUpdateList(re->updates, PC); + } + + void printExtract(const ExtractExpr *ee, PrintContext &PC, unsigned indent) { + PC << ee->offset << ' '; + print(ee->expr, PC); + } + + void printExpr(const Expr *ep, PrintContext &PC, unsigned indent, bool printConstWidth=false) { + bool simple = hasSimpleKids(ep); + + print(ep->getKid(0), PC); + for (unsigned i=1; i<ep->getNumKids(); i++) { + printSeparator(PC, simple, indent); + print(ep->getKid(i), PC, printConstWidth); + } + } + +public: + PPrinter(std::ostream &_os) : os(_os), newline("\n") { + reset(); + } + + void setNewline(const std::string &_newline) { + newline = _newline; + } + + void reset() { + counter = 0; + updateCounter = 0; + hasScan = false; + bindings.clear(); + updateBindings.clear(); + couldPrint.clear(); + shouldPrint.clear(); + couldPrintUpdates.clear(); + shouldPrintUpdates.clear(); + } + + void scan(const ref<Expr> &e) { + hasScan = true; + scan1(e); + } + + void print(const ref<Expr> &e, unsigned level=0) { + PrintContext PC(os); + PC.pos = level; + print(e, PC); + } + + void printConst(const ref<Expr> &e, PrintContext &PC, bool printWidth) { + assert(e.isConstant()); + + if (e.getWidth() == Expr::Bool) + PC << (e.getConstantValue() ? "true" : "false"); + else { + if (PCAllConstWidths) + printWidth = true; + + if (printWidth) + PC << "(w" << e.getWidth() << " "; + + PC << e.getConstantValue(); + + if (printWidth) + PC << ")"; + } + } + + void print(const ref<Expr> &e, PrintContext &PC, bool printConstWidth=false) { + if (e.isConstant()) + printConst(e, PC, printConstWidth); + else { + std::map<ref<Expr>, unsigned>::iterator it = bindings.find(e); + if (it!=bindings.end()) { + PC << 'N' << it->second; + } else { + if (!hasScan || shouldPrint.count(e)) { + PC << 'N' << counter << ':'; + bindings.insert(std::make_pair(e, counter++)); + } + + // Detect Not. + // FIXME: This should be in common code. + if (const EqExpr *ee = dyn_ref_cast<EqExpr>(e)) { + if (ee->left == ref<Expr>(false, Expr::Bool)) { + PC << "(Not"; + printWidth(PC, e); + PC << ' '; + // FIXME: This is a boom if right is a constant. + print(ee->right, PC); + PC << ')'; + return; + } + } + + // Detect multibyte reads. + // FIXME: Hrm. One problem with doing this is that we are + // masking the sharing of the indices which aren't + // visible. Need to think if this matters... probably not + // because if they are offset reads then its either constant, + // or they are (base + offset) and base will get printed with + // a declaration. + if (PCMultibyteReads && e.getKind() == Expr::Concat) { + const Expr *ep = e.get(); + if (hasAllByteReads(ep)) { + bool isMSB = hasOrderedReads(ep, 1); + if (isMSB || hasOrderedReads(ep, -1)) { + PC << "(Read" << (isMSB ? "MSB" : "LSB"); + printWidth(PC, e); + PC << ' '; + unsigned firstIdx = isMSB ? 0 : ep->getNumKids()-1; + printRead(static_ref_cast<ReadExpr>(ep->getKid(firstIdx)), + PC, PC.pos); + PC << ')'; + return; + } + } + } + + PC << '(' << e.getKind(); + printWidth(PC, e); + PC << ' '; + + // Indent at first argument and dispatch to appropriate print + // routine for exprs which require special handling. + unsigned indent = PC.pos; + if (const ReadExpr *re = dyn_ref_cast<ReadExpr>(e)) { + printRead(re, PC, indent); + } else if (const ExtractExpr *ee = dyn_ref_cast<ExtractExpr>(e)) { + printExtract(ee, PC, indent); + } else if (e.getKind() == Expr::Concat || e.getKind() == Expr::SExt) + printExpr(e.get(), PC, indent, true); + else + printExpr(e.get(), PC, indent); + PC << ")"; + } + } + } + + /* Public utility functions */ + + void printSeparator(PrintContext &PC, bool simple, unsigned indent) { + if (simple) { + PC << ' '; + } else { + PC.breakLine(indent); + } + } +}; + +ExprPPrinter *klee::ExprPPrinter::create(std::ostream &os) { + return new PPrinter(os); +} + +void ExprPPrinter::printOne(std::ostream &os, + const char *message, + const ref<Expr> &e) { + PPrinter p(os); + p.scan(e); + + // FIXME: Need to figure out what to do here. Probably print as a + // "forward declaration" with whatever syntax we pick for that. + PrintContext PC(os); + PC << message << ": "; + p.print(e, PC); + PC.breakLine(); +} + +void ExprPPrinter::printConstraints(std::ostream &os, + const ConstraintManager &constraints) { + printQuery(os, constraints, ref<Expr>(false, Expr::Bool)); +} + +void ExprPPrinter::printQuery(std::ostream &os, + const ConstraintManager &constraints, + const ref<Expr> &q) { + PPrinter p(os); + + for (ConstraintManager::const_iterator it = constraints.begin(), + ie = constraints.end(); it != ie; ++it) + p.scan(*it); + p.scan(q); + + PrintContext PC(os); + PC << "(query ["; + + // Ident at constraint list; + unsigned indent = PC.pos; + for (ConstraintManager::const_iterator it = constraints.begin(), + ie = constraints.end(); it != ie;) { + p.print(*it, PC); + ++it; + if (it != ie) + PC.breakLine(indent); + } + PC << ']'; + + p.printSeparator(PC, constraints.empty(), indent-1); + p.print(q, PC); + + PC << ')'; + PC.breakLine(); +} diff --git a/lib/Expr/ExprUtil.cpp b/lib/Expr/ExprUtil.cpp new file mode 100644 index 00000000..f74b519f --- /dev/null +++ b/lib/Expr/ExprUtil.cpp @@ -0,0 +1,127 @@ +//===-- ExprUtil.cpp ------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/util/ExprUtil.h" +#include "klee/util/ExprHashMap.h" + +#include "klee/Expr.h" + +#include "klee/util/ExprVisitor.h" + +#include <set> + +using namespace klee; + +void klee::findReads(ref<Expr> e, + bool visitUpdates, + std::vector< ref<ReadExpr> > &results) { + // Invariant: \forall_{i \in stack} !i.isConstant() && i \in visited + std::vector< ref<Expr> > stack; + ExprHashSet visited; + std::set<const UpdateNode *> updates; + + if (!e.isConstant()) { + visited.insert(e); + stack.push_back(e); + } + + while (!stack.empty()) { + ref<Expr> top = stack.back(); + stack.pop_back(); + + if (ReadExpr *re = dyn_ref_cast<ReadExpr>(top)) { + // We memoized so can just add to list without worrying about + // repeats. + results.push_back(re); + + if (!re->index.isConstant() && + visited.insert(re->index).second) + stack.push_back(re->index); + + if (visitUpdates) { + // XXX this is probably suboptimal. We want to avoid a potential + // explosion traversing update lists which can be quite + // long. However, it seems silly to hash all of the update nodes + // especially since we memoize all the expr results anyway. So + // we take a simple approach of memoizing the results for the + // head, which often will be shared among multiple nodes. + if (updates.insert(re->updates.head).second) { + for (const UpdateNode *un=re->updates.head; un; un=un->next) { + if (!un->index.isConstant() && + visited.insert(un->index).second) + stack.push_back(un->index); + if (!un->value.isConstant() && + visited.insert(un->value).second) + stack.push_back(un->value); + } + } + } + } else if (!top.isConstant()) { + Expr *e = top.get(); + for (unsigned i=0; i<e->getNumKids(); i++) { + ref<Expr> k = e->getKid(i); + if (!k.isConstant() && + visited.insert(k).second) + stack.push_back(k); + } + } + } +} + +/// + +namespace klee { + +class SymbolicObjectFinder : public ExprVisitor { +protected: + Action visitRead(const ReadExpr &re) { + const UpdateList &ul = re.updates; + + // XXX should we memo better than what ExprVisitor is doing for us? + for (const UpdateNode *un=ul.head; un; un=un->next) { + visit(un->index); + visit(un->value); + } + + if (ul.isRooted) + if (results.insert(ul.root).second) + objects.push_back(ul.root); + + return Action::doChildren(); + } + +public: + std::set<const Array*> results; + std::vector<const Array*> &objects; + + SymbolicObjectFinder(std::vector<const Array*> &_objects) + : objects(_objects) {} +}; + +} + +template<typename InputIterator> +void klee::findSymbolicObjects(InputIterator begin, + InputIterator end, + std::vector<const Array*> &results) { + SymbolicObjectFinder of(results); + for (; begin!=end; ++begin) + of.visit(*begin); +} + +void klee::findSymbolicObjects(ref<Expr> e, + std::vector<const Array*> &results) { + findSymbolicObjects(&e, &e+1, results); +} + +typedef std::vector< ref<Expr> >::iterator A; +template void klee::findSymbolicObjects<A>(A, A, std::vector<const Array*> &); + +typedef std::set< ref<Expr> >::iterator B; +template void klee::findSymbolicObjects<B>(B, B, std::vector<const Array*> &); diff --git a/lib/Expr/ExprVisitor.cpp b/lib/Expr/ExprVisitor.cpp new file mode 100644 index 00000000..b15cdffa --- /dev/null +++ b/lib/Expr/ExprVisitor.cpp @@ -0,0 +1,253 @@ +//===-- ExprVisitor.cpp ---------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Expr.h" +#include "klee/util/ExprVisitor.h" + +#include "llvm/Support/CommandLine.h" + +namespace { + llvm::cl::opt<bool> + UseVisitorHash("use-visitor-hash", + llvm::cl::desc("Use hash-consing during expr visitation."), + llvm::cl::init(true)); +} + +using namespace klee; + +ref<Expr> ExprVisitor::visit(const ref<Expr> &e) { + if (!UseVisitorHash || e.isConstant()) { + return visitActual(e); + } else { + visited_ty::iterator it = visited.find(e); + + if (it!=visited.end()) { + return it->second; + } else { + ref<Expr> res = visitActual(e); + visited.insert(std::make_pair(e, res)); + return res; + } + } +} + +ref<Expr> ExprVisitor::visitActual(const ref<Expr> &e) { + if (e.isConstant()) { + return e; + } else { + Expr &ep = *e.get(); + + Action res = visitExpr(ep); + switch(res.kind) { + case Action::DoChildren: + // continue with normal action + break; + case Action::SkipChildren: + return e; + case Action::ChangeTo: + return res.argument; + } + + switch(ep.getKind()) { + case Expr::NotOptimized: res = visitNotOptimized(static_cast<NotOptimizedExpr&>(ep)); break; + case Expr::Read: res = visitRead(static_cast<ReadExpr&>(ep)); break; + case Expr::Select: res = visitSelect(static_cast<SelectExpr&>(ep)); break; + case Expr::Concat: res = visitConcat(static_cast<ConcatExpr&>(ep)); break; + case Expr::Extract: res = visitExtract(static_cast<ExtractExpr&>(ep)); break; + case Expr::ZExt: res = visitZExt(static_cast<ZExtExpr&>(ep)); break; + case Expr::SExt: res = visitSExt(static_cast<SExtExpr&>(ep)); break; + case Expr::Add: res = visitAdd(static_cast<AddExpr&>(ep)); break; + case Expr::Sub: res = visitSub(static_cast<SubExpr&>(ep)); break; + case Expr::Mul: res = visitMul(static_cast<MulExpr&>(ep)); break; + case Expr::UDiv: res = visitUDiv(static_cast<UDivExpr&>(ep)); break; + case Expr::SDiv: res = visitSDiv(static_cast<SDivExpr&>(ep)); break; + case Expr::URem: res = visitURem(static_cast<URemExpr&>(ep)); break; + case Expr::SRem: res = visitSRem(static_cast<SRemExpr&>(ep)); break; + case Expr::And: res = visitAnd(static_cast<AndExpr&>(ep)); break; + case Expr::Or: res = visitOr(static_cast<OrExpr&>(ep)); break; + case Expr::Xor: res = visitXor(static_cast<XorExpr&>(ep)); break; + case Expr::Shl: res = visitShl(static_cast<ShlExpr&>(ep)); break; + case Expr::LShr: res = visitLShr(static_cast<LShrExpr&>(ep)); break; + case Expr::AShr: res = visitAShr(static_cast<AShrExpr&>(ep)); break; + case Expr::Eq: res = visitEq(static_cast<EqExpr&>(ep)); break; + case Expr::Ne: res = visitNe(static_cast<NeExpr&>(ep)); break; + case Expr::Ult: res = visitUlt(static_cast<UltExpr&>(ep)); break; + case Expr::Ule: res = visitUle(static_cast<UleExpr&>(ep)); break; + case Expr::Ugt: res = visitUgt(static_cast<UgtExpr&>(ep)); break; + case Expr::Uge: res = visitUge(static_cast<UgeExpr&>(ep)); break; + case Expr::Slt: res = visitSlt(static_cast<SltExpr&>(ep)); break; + case Expr::Sle: res = visitSle(static_cast<SleExpr&>(ep)); break; + case Expr::Sgt: res = visitSgt(static_cast<SgtExpr&>(ep)); break; + case Expr::Sge: res = visitSge(static_cast<SgeExpr&>(ep)); break; + case Expr::Constant: + default: + assert(0 && "invalid expression kind"); + } + + switch(res.kind) { + case Action::DoChildren: { + bool rebuild = false; + ref<Expr> e(&ep), kids[8]; + unsigned count = ep.getNumKids(); + for (unsigned i=0; i<count; i++) { + ref<Expr> kid = ep.getKid(i); + kids[i] = visit(kid); + if (kids[i] != kid) + rebuild = true; + } + if (rebuild) { + e = ep.rebuild(kids); + if (recursive) + e = visit(e); + } + if (!e.isConstant()) { + res = visitExprPost(*e.get()); + if (res.kind==Action::ChangeTo) + e = res.argument; + } + return e; + } + case Action::SkipChildren: + return e; + case Action::ChangeTo: + return res.argument; + default: + assert(0 && "invalid kind"); + } + } +} + +ExprVisitor::Action ExprVisitor::visitExpr(const Expr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitExprPost(const Expr&) { + return Action::skipChildren(); +} + +ExprVisitor::Action ExprVisitor::visitNotOptimized(const NotOptimizedExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitRead(const ReadExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSelect(const SelectExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitConcat(const ConcatExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitExtract(const ExtractExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitZExt(const ZExtExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSExt(const SExtExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitAdd(const AddExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSub(const SubExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitMul(const MulExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitUDiv(const UDivExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSDiv(const SDivExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitURem(const URemExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSRem(const SRemExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitAnd(const AndExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitOr(const OrExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitXor(const XorExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitShl(const ShlExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitLShr(const LShrExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitAShr(const AShrExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitEq(const EqExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitNe(const NeExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitUlt(const UltExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitUle(const UleExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitUgt(const UgtExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitUge(const UgeExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSlt(const SltExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSle(const SleExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSgt(const SgtExpr&) { + return Action::doChildren(); +} + +ExprVisitor::Action ExprVisitor::visitSge(const SgeExpr&) { + return Action::doChildren(); +} + diff --git a/lib/Expr/Lexer.cpp b/lib/Expr/Lexer.cpp new file mode 100644 index 00000000..77e25f62 --- /dev/null +++ b/lib/Expr/Lexer.cpp @@ -0,0 +1,261 @@ +//===-- Lexer.cpp ---------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "expr/Lexer.h" + +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Streams.h" + +#include <iomanip> +#include <iostream> +#include <string.h> + +using namespace llvm; +using namespace klee; +using namespace klee::expr; + +/// + +const char *Token::getKindName() const { + switch (kind) { + default: + case Unknown: return "Unknown"; + case Arrow: return "Arrow"; + case At: return "At"; + case Colon: return "Colon"; + case Comma: return "Comma"; + case Comment: return "Comment"; + case EndOfFile: return "EndOfFile"; + case Equals: return "Equals"; + case Identifier: return "Identifier"; + case KWFalse: return "KWFalse"; + case KWQuery: return "KWQuery"; + case KWReserved: return "KWReserved"; + case KWTrue: return "KWTrue"; + case KWWidth: return "KWWidth"; + case LBrace: return "LBrace"; + case LParen: return "LParen"; + case LSquare: return "LSquare"; + case Number: return "Number"; + case RBrace: return "RBrace"; + case RParen: return "RParen"; + case RSquare: return "RSquare"; + case Semicolon: return "Semicolon"; + } +} + +void Token::dump() { + llvm::cerr << "(Token \"" << getKindName() << "\" " + << (void*) start << " " << length << " " + << line << " " << column << ")"; +} + +/// + +static inline bool isInternalIdentifierChar(int Char) { + return isalnum(Char) || Char == '_' || Char == '.'; +} + +Lexer::Lexer(const llvm::MemoryBuffer *MB) + : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()), + LineNumber(1), ColumnNumber(0) { +} + +Lexer::~Lexer() { +} + +int Lexer::PeekNextChar() { + if (BufferPos == BufferEnd) + return -1; + return *BufferPos; +} + +int Lexer::GetNextChar() { + if (BufferPos == BufferEnd) + return -1; + + // Handle DOS/Mac newlines here, by stripping duplicates and by + // returning '\n' for both. + char Result = *BufferPos++; + if (Result == '\n' || Result == '\r') { + if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result)) + ++BufferPos; + Result = '\n'; + } + + if (Result == '\n') { + ++LineNumber; + ColumnNumber = 0; + } else { + ++ColumnNumber; + } + + return Result; +} + +Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) { + Result.kind = k; + Result.length = BufferPos - Result.start; + return Result; +} + +static bool isReservedKW(const char *Str, unsigned N) { + unsigned i; + + // Check for i[0-9]+ + if (N>1 && Str[0] == 'i') { + for (i=1; i<N; ++i) + if (!isdigit(Str[i])) + break; + if (i==N) + return true; + } + + // Check for fp[0-9]+([.].*)?$ + if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) { + for (i=3; i<N; ++i) + if (!isdigit(Str[i])) + break; + if (i==N || Str[i]=='.') + return true; + } + + return false; +} +static bool isWidthKW(const char *Str, unsigned N) { + if (N<2 || Str[0] != 'w') + return false; + for (unsigned i=1; i<N; ++i) + if (!isdigit(Str[i])) + return false; + return true; +} +Token &Lexer::SetIdentifierTokenKind(Token &Result) { + unsigned Length = BufferPos - Result.start; + switch (Length) { + case 3: + if (memcmp("def", Result.start, 3) == 0) + return SetTokenKind(Result, Token::KWReserved); + if (memcmp("var", Result.start, 3) == 0) + return SetTokenKind(Result, Token::KWReserved); + break; + + case 4: + if (memcmp("true", Result.start, 4) == 0) + return SetTokenKind(Result, Token::KWTrue); + break; + + case 5: + if (memcmp("array", Result.start, 5) == 0) + return SetTokenKind(Result, Token::KWReserved); + if (memcmp("false", Result.start, 5) == 0) + return SetTokenKind(Result, Token::KWFalse); + if (memcmp("query", Result.start, 5) == 0) + return SetTokenKind(Result, Token::KWQuery); + break; + + case 6: + if (memcmp("define", Result.start, 6) == 0) + return SetTokenKind(Result, Token::KWReserved); + break; + + case 7: + if (memcmp("declare", Result.start, 7) == 0) + return SetTokenKind(Result, Token::KWReserved); + break; + } + + if (isReservedKW(Result.start, Length)) + return SetTokenKind(Result, Token::KWReserved); + if (isWidthKW(Result.start, Length)) + return SetTokenKind(Result, Token::KWWidth); + + return SetTokenKind(Result, Token::Identifier); +} + +void Lexer::SkipToEndOfLine() { + for (;;) { + int Char = GetNextChar(); + if (Char == -1 || Char =='\n') + break; + } +} + +Token &Lexer::LexNumber(Token &Result) { + while (isalnum(PeekNextChar()) || PeekNextChar()=='_') + GetNextChar(); + return SetTokenKind(Result, Token::Number); +} + +Token &Lexer::LexIdentifier(Token &Result) { + while (isInternalIdentifierChar(PeekNextChar())) + GetNextChar(); + + // Recognize keywords specially. + return SetIdentifierTokenKind(Result); +} + +Token &Lexer::Lex(Token &Result) { + Result.kind = Token::Unknown; + Result.length = 0; + Result.start = BufferPos; + + // Skip whitespace. + while (isspace(PeekNextChar())) + GetNextChar(); + + Result.start = BufferPos; + Result.line = LineNumber; + Result.column = ColumnNumber; + int Char = GetNextChar(); + switch (Char) { + case -1: return SetTokenKind(Result, Token::EndOfFile); + + case '(': return SetTokenKind(Result, Token::LParen); + case ')': return SetTokenKind(Result, Token::RParen); + case ',': return SetTokenKind(Result, Token::Comma); + case ':': return SetTokenKind(Result, Token::Colon); + case ';': return SetTokenKind(Result, Token::Semicolon); + case '=': return SetTokenKind(Result, Token::Equals); + case '@': return SetTokenKind(Result, Token::At); + case '[': return SetTokenKind(Result, Token::LSquare); + case ']': return SetTokenKind(Result, Token::RSquare); + case '{': return SetTokenKind(Result, Token::LBrace); + case '}': return SetTokenKind(Result, Token::RBrace); + + case '#': + SkipToEndOfLine(); + return SetTokenKind(Result, Token::Comment); + + case '+': { + if (isdigit(PeekNextChar())) + return LexNumber(Result); + else + return SetTokenKind(Result, Token::Unknown); + } + + case '-': { + int Next = PeekNextChar(); + if (Next == '>') + return GetNextChar(), SetTokenKind(Result, Token::Arrow); + else if (isdigit(Next)) + return LexNumber(Result); + else + return SetTokenKind(Result, Token::Unknown); + break; + } + + default: + if (isdigit(Char)) + return LexNumber(Result); + else if (isalpha(Char) || Char == '_') + return LexIdentifier(Result); + return SetTokenKind(Result, Token::Unknown); + } +} diff --git a/lib/Expr/Makefile b/lib/Expr/Makefile new file mode 100644 index 00000000..b80569b3 --- /dev/null +++ b/lib/Expr/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Expr/Makefile -----------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleaverExpr +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Expr/Parser.cpp b/lib/Expr/Parser.cpp new file mode 100644 index 00000000..f5708384 --- /dev/null +++ b/lib/Expr/Parser.cpp @@ -0,0 +1,1310 @@ +//===-- Parser.cpp --------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "expr/Parser.h" + +#include "expr/Lexer.h" + +#include "klee/Constraints.h" +#include "klee/Solver.h" +#include "klee/util/ExprPPrinter.h" + +#include "llvm/ADT/APInt.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Streams.h" + +#include <cassert> +#include <iostream> +#include <map> + +using namespace llvm; +using namespace klee; +using namespace klee::expr; + +namespace { + /// ParseResult - Represent a possibly invalid parse result. + template<typename T> + struct ParseResult { + bool IsValid; + T Value; + + public: + ParseResult() : IsValid(false) {} + ParseResult(T _Value) : IsValid(true), Value(_Value) {} + ParseResult(bool _IsValid, T _Value) : IsValid(_IsValid), Value(_Value) {} + + bool isValid() { + return IsValid; + } + T get() { + assert(IsValid && "get() on invalid ParseResult!"); + return Value; + } + }; + + typedef ParseResult<Decl*> DeclResult; + typedef ParseResult<ExprHandle> ExprResult; + typedef ParseResult<Expr::Width> TypeResult; + typedef ParseResult<VersionHandle> VersionResult; + + /// NumberOrExprResult - Represent a number or expression. This is used to + /// wrap an expression production which may be a number, but for + /// which the type width is unknown. + class NumberOrExprResult { + Token AsNumber; + ExprResult AsExpr; + bool IsNumber; + + public: + NumberOrExprResult() : IsNumber(false) {} + explicit NumberOrExprResult(Token _AsNumber) : AsNumber(_AsNumber), + IsNumber(true) {} + explicit NumberOrExprResult(ExprResult _AsExpr) : AsExpr(_AsExpr), + IsNumber(false) {} + + bool isNumber() const { return IsNumber; } + const Token &getNumber() const { + assert(IsNumber && "Invalid accessor call."); + return AsNumber; + } + const ExprResult &getExpr() const { + assert(!IsNumber && "Invalid accessor call."); + return AsExpr; + } + }; + + /// ParserImpl - Parser implementation. + class ParserImpl : public Parser { + typedef std::map<const std::string, const Identifier*> IdentifierTabTy; + typedef std::map<const Identifier*, ExprHandle> ExprSymTabTy; + typedef std::map<const Identifier*, VersionHandle> VersionSymTabTy; + + const std::string Filename; + const MemoryBuffer *TheMemoryBuffer; + Lexer TheLexer; + unsigned MaxErrors; + unsigned NumErrors; + + // FIXME: Use LLVM symbol tables? + IdentifierTabTy IdentifierTab; + + std::map<const Identifier*, const ArrayDecl*> ArraySymTab; + ExprSymTabTy ExprSymTab; + VersionSymTabTy VersionSymTab; + + /// Tok - The currently lexed token. + Token Tok; + + /// ParenLevel - The current depth of matched '(' tokens. + unsigned ParenLevel; + /// SquareLevel - The current depth of matched '[' tokens. + unsigned SquareLevel; + + /* Core parsing functionality */ + + const Identifier *GetOrCreateIdentifier(const Token &Tok); + + void GetNextNonCommentToken() { + do { + TheLexer.Lex(Tok); + } while (Tok.kind == Token::Comment); + } + + /// ConsumeToken - Consume the current 'peek token' and lex the next one. + void ConsumeToken() { + assert(Tok.kind != Token::LParen && Tok.kind != Token::RParen); + GetNextNonCommentToken(); + } + + /// ConsumeExpectedToken - Check that the current token is of the + /// expected kind and consume it. + void ConsumeExpectedToken(Token::Kind k) { + assert(Tok.kind == k && "Unexpected token!"); + GetNextNonCommentToken(); + } + + void ConsumeLParen() { + ++ParenLevel; + ConsumeExpectedToken(Token::LParen); + } + + void ConsumeRParen() { + if (ParenLevel) // Cannot go below zero. + --ParenLevel; + ConsumeExpectedToken(Token::RParen); + } + + void ConsumeLSquare() { + ++SquareLevel; + ConsumeExpectedToken(Token::LSquare); + } + + void ConsumeRSquare() { + if (SquareLevel) // Cannot go below zero. + --SquareLevel; + ConsumeExpectedToken(Token::RSquare); + } + + void ConsumeAnyToken() { + switch (Tok.kind) { + case Token::LParen: return ConsumeLParen(); + case Token::RParen: return ConsumeRParen(); + case Token::LSquare: return ConsumeLSquare(); + case Token::RSquare: return ConsumeRSquare(); + default: + return ConsumeToken(); + } + } + + /* Utility functions */ + + /// SkipUntilRParen - Scan forward to the next token following an + /// rparen at the given level, or EOF, whichever is first. + void SkipUntilRParen(unsigned Level) { + // FIXME: I keep wavering on whether it is an error to call this + // with the current token an rparen. In most cases this should + // have been handled differently (error reported, + // whatever). Audit & resolve. + assert(Level <= ParenLevel && + "Refusing to skip until rparen at higher level."); + while (Tok.kind != Token::EndOfFile) { + if (Tok.kind == Token::RParen && ParenLevel == Level) { + ConsumeRParen(); + break; + } + ConsumeAnyToken(); + } + } + + /// SkipUntilRParen - Scan forward until reaching an rparen token + /// at the current level (or EOF). + void SkipUntilRParen() { + SkipUntilRParen(ParenLevel); + } + + /// ExpectRParen - Utility method to close an sexp. This expects to + /// eat an rparen, and emits a diagnostic and skips to the next one + /// (or EOF) if it cannot. + void ExpectRParen(const char *Msg) { + if (Tok.kind == Token::EndOfFile) { + // FIXME: Combine with Msg + Error("expected ')' but found end-of-file.", Tok); + } else if (Tok.kind != Token::RParen) { + Error(Msg, Tok); + SkipUntilRParen(); + } else { + ConsumeRParen(); + } + } + + /// SkipUntilRSquare - Scan forward to the next token following an + /// rsquare at the given level, or EOF, whichever is first. + void SkipUntilRSquare(unsigned Level) { + // FIXME: I keep wavering on whether it is an error to call this + // with the current token an rparen. In most cases this should + // have been handled differently (error reported, + // whatever). Audit & resolve. + assert(Level <= ParenLevel && + "Refusing to skip until rparen at higher level."); + while (Tok.kind != Token::EndOfFile) { + if (Tok.kind == Token::RSquare && ParenLevel == Level) { + ConsumeRSquare(); + break; + } + ConsumeAnyToken(); + } + } + + /// SkipUntilRSquare - Scan forward until reaching an rsquare token + /// at the current level (or EOF). + void SkipUntilRSquare() { + SkipUntilRSquare(ParenLevel); + } + + /// ExpectRSquare - Utility method to close an array. This expects + /// to eat an rparen, and emits a diagnostic and skips to the next + /// one (or EOF) if it cannot. + void ExpectRSquare(const char *Msg) { + if (Tok.kind == Token::EndOfFile) { + // FIXME: Combine with Msg + Error("expected ']' but found end-of-file.", Tok); + } else if (Tok.kind != Token::RSquare) { + Error(Msg, Tok); + SkipUntilRSquare(); + } else { + ConsumeRSquare(); + } + } + + /*** Grammar productions ****/ + + /* Top level decls */ + + DeclResult ParseArrayDecl(); + DeclResult ParseExprVarDecl(); + DeclResult ParseVersionVarDecl(); + DeclResult ParseCommandDecl(); + + /* Commands */ + + DeclResult ParseQueryCommand(); + + /* Etc. */ + + NumberOrExprResult ParseNumberOrExpr(); + + ExprResult ParseExpr(TypeResult ExpectedType); + ExprResult ParseParenExpr(TypeResult ExpectedType); + ExprResult ParseUnaryParenExpr(const Token &Name, + unsigned Kind, bool IsFixed, + Expr::Width ResTy); + ExprResult ParseBinaryParenExpr(const Token &Name, + unsigned Kind, bool IsFixed, + Expr::Width ResTy); + ExprResult ParseSelectParenExpr(const Token &Name, Expr::Width ResTy); + ExprResult ParseConcatParenExpr(const Token &Name, Expr::Width ResTy); + ExprResult ParseExtractParenExpr(const Token &Name, Expr::Width ResTy); + ExprResult ParseAnyReadParenExpr(const Token &Name, + unsigned Kind, + Expr::Width ResTy); + void ParseMatchedBinaryArgs(const Token &Name, + TypeResult ExpectType, + ExprResult &LHS, ExprResult &RHS); + ExprResult ParseNumber(Expr::Width Width); + ExprResult ParseNumberToken(Expr::Width Width, const Token &Tok); + + VersionResult ParseVersionSpecifier(); + VersionResult ParseVersion(); + + TypeResult ParseTypeSpecifier(); + + /*** Diagnostics ***/ + + void Error(const char *Message, const Token &At); + void Error(const char *Message) { Error(Message, Tok); } + + public: + ParserImpl(const std::string _Filename, + const MemoryBuffer *MB) : Filename(_Filename), + TheMemoryBuffer(MB), + TheLexer(MB), + MaxErrors(~0u), + NumErrors(0) {} + + /// Initialize - Initialize the parsing state. This must be called + /// prior to the start of parsing. + void Initialize() { + ParenLevel = SquareLevel = 0; + + ConsumeAnyToken(); + } + + /* Parser interface implementation */ + + virtual Decl *ParseTopLevelDecl(); + + virtual void SetMaxErrors(unsigned N) { + MaxErrors = N; + } + + virtual unsigned GetNumErrors() const { + return NumErrors; + } + }; +} + +const Identifier *ParserImpl::GetOrCreateIdentifier(const Token &Tok) { + // FIXME: Make not horribly inefficient please. + assert(Tok.kind == Token::Identifier && "Expected only identifier tokens."); + std::string Name(Tok.start, Tok.length); + IdentifierTabTy::iterator it = IdentifierTab.find(Name); + if (it != IdentifierTab.end()) + return it->second; + + Identifier *I = new Identifier(Name); + IdentifierTab.insert(std::make_pair(Name, I)); + + return I; +} + +Decl *ParserImpl::ParseTopLevelDecl() { + // Repeat until success or EOF. + while (Tok.kind != Token::EndOfFile) { + // Only handle commands for now. + if (Tok.kind == Token::LParen) { + DeclResult Res = ParseCommandDecl(); + if (Res.isValid()) + return Res.get(); + } else { + Error("expected '(' token."); + ConsumeAnyToken(); + } + } + + return 0; +} + +/// ParseCommandDecl - Parse a command declaration. The lexer should +/// be positioned at the opening '('. +/// +/// command = '(' name ... ')' +DeclResult ParserImpl::ParseCommandDecl() { + ConsumeLParen(); + + if (!Tok.isKeyword()) { + Error("malformed command."); + SkipUntilRParen(); + return DeclResult(); + } + + switch (Tok.kind) { + case Token::KWQuery: + return ParseQueryCommand(); + + default: + Error("malformed command (unexpected keyword)."); + SkipUntilRParen(); + return DeclResult(); + } +} + +/// ParseQueryCommand - Parse query command. The lexer should be +/// positioned at the 'query' keyword. +/// +/// 'query' expressions-list expression [expression-list [array-list]] +DeclResult ParserImpl::ParseQueryCommand() { + // FIXME: We need a command for this. Or something. + ExprSymTab.clear(); + VersionSymTab.clear(); + + std::vector<ExprHandle> Constraints; + ConsumeExpectedToken(Token::KWQuery); + if (Tok.kind != Token::LSquare) { + Error("malformed query, expected constraint list."); + SkipUntilRParen(); + return DeclResult(); + } + + ConsumeExpectedToken(Token::LSquare); + // FIXME: Should avoid reading past unbalanced parens here. + while (Tok.kind != Token::RSquare) { + if (Tok.kind == Token::EndOfFile) { + Error("unexpected end of file."); + return new QueryCommand(Constraints.begin(), Constraints.end(), + ref<Expr>(false, Expr::Bool)); + } + + ExprResult Res = ParseExpr(TypeResult(Expr::Bool)); + if (Res.isValid()) + Constraints.push_back(Res.get()); + } + + ConsumeRSquare(); + + ExprResult Res = ParseExpr(TypeResult()); + if (!Res.isValid()) // Error emitted by ParseExpr. + Res = ExprResult(ref<Expr>(0, Expr::Bool)); + + ExpectRParen("unexpected argument to 'query'."); + return new QueryCommand(Constraints.begin(), Constraints.end(), + Res.get()); +} + +/// ParseNumberOrExpr - Parse an expression whose type cannot be +/// predicted. +NumberOrExprResult ParserImpl::ParseNumberOrExpr() { + if (Tok.kind == Token::Number){ + Token Num = Tok; + ConsumeToken(); + return NumberOrExprResult(Num); + } else { + return NumberOrExprResult(ParseExpr(TypeResult())); + } +} + +/// ParseExpr - Parse an expression with the given \arg +/// ExpectedType. \arg ExpectedType can be invalid if the type cannot +/// be inferred from the context. +/// +/// expr = false | true +/// expr = <constant> +/// expr = <identifier> +/// expr = [<identifier>:] paren-expr +ExprResult ParserImpl::ParseExpr(TypeResult ExpectedType) { + // FIXME: Is it right to need to do this here? + if (Tok.kind == Token::EndOfFile) { + Error("unexpected end of file."); + return ExprResult(); + } + + if (Tok.kind == Token::KWFalse || Tok.kind == Token::KWTrue) { + bool Value = Tok.kind == Token::KWTrue; + ConsumeToken(); + return ExprResult(ref<Expr>(Value, Expr::Bool)); + } + + if (Tok.kind == Token::Number) { + if (!ExpectedType.isValid()) { + Error("cannot infer type of number."); + ConsumeToken(); + return ExprResult(); + } + + return ParseNumber(ExpectedType.get()); + } + + const Identifier *Label = 0; + if (Tok.kind == Token::Identifier) { + Token LTok = Tok; + Label = GetOrCreateIdentifier(Tok); + ConsumeToken(); + + if (Tok.kind != Token::Colon) { + ExprSymTabTy::iterator it = ExprSymTab.find(Label); + + if (it == ExprSymTab.end()) { + Error("invalid expression label reference.", LTok); + return ExprResult(); + } + + return it->second; + } + + ConsumeToken(); + if (ExprSymTab.count(Label)) { + Error("duplicate expression label definition.", LTok); + Label = 0; + } + } + + Token Start = Tok; + ExprResult Res = ParseParenExpr(ExpectedType); + if (!Res.isValid()) { + // If we know the type, define the identifier just so we don't get + // use-of-undef errors. + // FIXME: Maybe we should let the symbol table map to invalid + // entries? + if (Label && ExpectedType.isValid()) + ExprSymTab.insert(std::make_pair(Label, + ref<Expr>(0, ExpectedType.get()))); + return Res; + } else if (ExpectedType.isValid()) { + // Type check result. + if (Res.get().getWidth() != ExpectedType.get()) { + // FIXME: Need more info, and range + Error("expression has incorrect type.", Start); + return ExprResult(); + } + } + + if (Label) + ExprSymTab.insert(std::make_pair(Label, Res.get())); + return Res; +} + +// Additional kinds for macro forms. +enum MacroKind { + eMacroKind_Not = Expr::LastKind + 1, // false == x + eMacroKind_Neg, // 0 - x + eMacroKind_ReadLSB, // Multibyte read + eMacroKind_ReadMSB, // Multibyte write + eMacroKind_Concat, // Magic concatenation syntax + eMacroKind_LastMacroKind = eMacroKind_ReadMSB +}; + +/// LookupExprInfo - Return information on the named token, if it is +/// recognized. +/// +/// \param Kind [out] - The Expr::Kind or MacroKind of the identifier. +/// \param IsFixed [out] - True if the given kinds result and +/// (expression) arguments are all of the same width. +/// \param NumArgs [out] - The number of expression arguments for this +/// kind. -1 indicates the kind is variadic or has non-expression +/// arguments. +/// \return True if the token is a valid kind or macro name. +static bool LookupExprInfo(const Token &Tok, unsigned &Kind, + bool &IsFixed, int &NumArgs) { +#define SetOK(kind, isfixed, numargs) (Kind=kind, IsFixed=isfixed,\ + NumArgs=numargs, true) + assert(Tok.kind == Token::Identifier && "Unexpected token."); + + switch (Tok.length) { + case 2: + if (memcmp(Tok.start, "Eq", 2) == 0) + return SetOK(Expr::Eq, false, 2); + if (memcmp(Tok.start, "Ne", 2) == 0) + return SetOK(Expr::Ne, false, 2); + + if (memcmp(Tok.start, "Or", 2) == 0) + return SetOK(Expr::Or, true, 2); + break; + + case 3: + if (memcmp(Tok.start, "Add", 3) == 0) + return SetOK(Expr::Add, true, 2); + if (memcmp(Tok.start, "Sub", 3) == 0) + return SetOK(Expr::Sub, true, 2); + if (memcmp(Tok.start, "Mul", 3) == 0) + return SetOK(Expr::Mul, true, 2); + + if (memcmp(Tok.start, "And", 3) == 0) + return SetOK(Expr::And, true, 2); + if (memcmp(Tok.start, "Shl", 3) == 0) + return SetOK(Expr::Shl, true, 2); + if (memcmp(Tok.start, "Xor", 3) == 0) + return SetOK(Expr::Xor, true, 2); + + if (memcmp(Tok.start, "Not", 3) == 0) + return SetOK(eMacroKind_Not, true, 1); + if (memcmp(Tok.start, "Neg", 3) == 0) + return SetOK(eMacroKind_Neg, true, 1); + if (memcmp(Tok.start, "Ult", 3) == 0) + return SetOK(Expr::Ult, false, 2); + if (memcmp(Tok.start, "Ule", 3) == 0) + return SetOK(Expr::Ule, false, 2); + if (memcmp(Tok.start, "Ugt", 3) == 0) + return SetOK(Expr::Ugt, false, 2); + if (memcmp(Tok.start, "Uge", 3) == 0) + return SetOK(Expr::Uge, false, 2); + if (memcmp(Tok.start, "Slt", 3) == 0) + return SetOK(Expr::Slt, false, 2); + if (memcmp(Tok.start, "Sle", 3) == 0) + return SetOK(Expr::Sle, false, 2); + if (memcmp(Tok.start, "Sgt", 3) == 0) + return SetOK(Expr::Sgt, false, 2); + if (memcmp(Tok.start, "Sge", 3) == 0) + return SetOK(Expr::Sge, false, 2); + break; + + case 4: + if (memcmp(Tok.start, "Read", 4) == 0) + return SetOK(Expr::Read, true, -1); + if (memcmp(Tok.start, "AShr", 4) == 0) + return SetOK(Expr::AShr, true, 2); + if (memcmp(Tok.start, "LShr", 4) == 0) + return SetOK(Expr::LShr, true, 2); + + if (memcmp(Tok.start, "UDiv", 4) == 0) + return SetOK(Expr::UDiv, true, 2); + if (memcmp(Tok.start, "SDiv", 4) == 0) + return SetOK(Expr::SDiv, true, 2); + if (memcmp(Tok.start, "URem", 4) == 0) + return SetOK(Expr::URem, true, 2); + if (memcmp(Tok.start, "SRem", 4) == 0) + return SetOK(Expr::SRem, true, 2); + + if (memcmp(Tok.start, "SExt", 4) == 0) + return SetOK(Expr::SExt, false, 1); + if (memcmp(Tok.start, "ZExt", 4) == 0) + return SetOK(Expr::ZExt, false, 1); + break; + + case 6: + if (memcmp(Tok.start, "Concat", 6) == 0) + return SetOK(eMacroKind_Concat, false, -1); + if (memcmp(Tok.start, "Select", 6) == 0) + return SetOK(Expr::Select, false, 3); + break; + + case 7: + if (memcmp(Tok.start, "Extract", 7) == 0) + return SetOK(Expr::Extract, false, -1); + if (memcmp(Tok.start, "ReadLSB", 7) == 0) + return SetOK(eMacroKind_ReadLSB, true, -1); + if (memcmp(Tok.start, "ReadMSB", 7) == 0) + return SetOK(eMacroKind_ReadMSB, true, -1); + break; + } + + return false; +#undef SetOK +} + +/// ParseParenExpr - Parse a parenthesized expression with the given +/// \arg ExpectedType. \arg ExpectedType can be invalid if the type +/// cannot be inferred from the context. +/// +/// paren-expr = '(' type number ')' +/// paren-expr = '(' identifier [type] expr+ ') +/// paren-expr = '(' ('Read' | 'ReadMSB' | 'ReadLSB') type expr update-list ')' +ExprResult ParserImpl::ParseParenExpr(TypeResult FIXME_UNUSED) { + if (Tok.kind != Token::LParen) { + Error("unexpected token."); + ConsumeAnyToken(); + return ExprResult(); + } + + ConsumeLParen(); + + // Check for coercion case (w32 11). + if (Tok.kind == Token::KWWidth) { + TypeResult ExpectedType = ParseTypeSpecifier(); + + if (Tok.kind != Token::Number) { + Error("coercion can only apply to a number."); + SkipUntilRParen(); + return ExprResult(); + } + + // Make sure this was a type specifier we support. + ExprResult Res; + if (ExpectedType.isValid()) + Res = ParseNumber(ExpectedType.get()); + else + ConsumeToken(); + + ExpectRParen("unexpected argument in coercion."); + return Res; + } + + if (Tok.kind != Token::Identifier) { + Error("unexpected token, expected expression."); + SkipUntilRParen(); + return ExprResult(); + } + + Token Name = Tok; + ConsumeToken(); + + // FIXME: Use invalid type (i.e. width==0)? + Token TypeTok = Tok; + bool HasType = TypeTok.kind == Token::KWWidth; + TypeResult Type = HasType ? ParseTypeSpecifier() : Expr::Bool; + + // FIXME: For now just skip to rparen on error. It might be nice + // to try and actually parse the child nodes though for error + // messages & better recovery? + if (!Type.isValid()) { + SkipUntilRParen(); + return ExprResult(); + } + Expr::Width ResTy = Type.get(); + + unsigned ExprKind; + bool IsFixed; + int NumArgs; + if (!LookupExprInfo(Name, ExprKind, IsFixed, NumArgs)) { + // FIXME: For now just skip to rparen on error. It might be nice + // to try and actually parse the child nodes though for error + // messages & better recovery? + Error("unknown expression kind.", Name); + SkipUntilRParen(); + return ExprResult(); + } + + // See if we have to parse this form specially. + if (NumArgs == -1) { + switch (ExprKind) { + case eMacroKind_Concat: + return ParseConcatParenExpr(Name, ResTy); + + case Expr::Extract: + return ParseExtractParenExpr(Name, ResTy); + + case eMacroKind_ReadLSB: + case eMacroKind_ReadMSB: + case Expr::Read: + return ParseAnyReadParenExpr(Name, ExprKind, ResTy); + + default: + Error("internal error, unimplemented special form.", Name); + SkipUntilRParen(); + return ExprResult(ref<Expr>(0, ResTy)); + } + } + + switch (NumArgs) { + case 1: + return ParseUnaryParenExpr(Name, ExprKind, IsFixed, ResTy); + case 2: + return ParseBinaryParenExpr(Name, ExprKind, IsFixed, ResTy); + case 3: + if (ExprKind == Expr::Select) + return ParseSelectParenExpr(Name, ResTy); + default: + assert(0 && "Invalid argument kind (number of args)."); + return ExprResult(); + } +} + +ExprResult ParserImpl::ParseUnaryParenExpr(const Token &Name, + unsigned Kind, bool IsFixed, + Expr::Width ResTy) { + if (Tok.kind == Token::RParen) { + Error("unexpected end of arguments.", Name); + ConsumeRParen(); + return ref<Expr>(0, ResTy); + } + + ExprResult Arg = ParseExpr(IsFixed ? ResTy : TypeResult()); + if (!Arg.isValid()) + Arg = ref<Expr>(0, ResTy); + + ExpectRParen("unexpected argument in unary expression."); + ExprHandle E = Arg.get(); + switch (Kind) { + case eMacroKind_Not: + return EqExpr::alloc(ref<Expr>(0, E.getWidth()), E); + case eMacroKind_Neg: + return SubExpr::alloc(ref<Expr>(0, E.getWidth()), E); + case Expr::SExt: + // FIXME: Type check arguments. + return SExtExpr::alloc(E, ResTy); + case Expr::ZExt: + // FIXME: Type check arguments. + return ZExtExpr::alloc(E, ResTy); + default: + Error("internal error, unhandled kind.", Name); + return ref<Expr>(0, ResTy); + } +} + +/// ParseMatchedBinaryArgs - Parse a pair of arguments who are +/// expected to be of the same type. Upon return, if both LHS and RHS +/// are valid then they are guaranteed to have the same type. +/// +/// Name - The name token of the expression, for diagnostics. +/// ExpectType - The expected type of the arguments, if known. +void ParserImpl::ParseMatchedBinaryArgs(const Token &Name, + TypeResult ExpectType, + ExprResult &LHS, ExprResult &RHS) { + if (Tok.kind == Token::RParen) { + Error("unexpected end of arguments.", Name); + ConsumeRParen(); + return; + } + + // Avoid NumberOrExprResult overhead and give more precise + // diagnostics when we know the type. + if (ExpectType.isValid()) { + LHS = ParseExpr(ExpectType); + if (Tok.kind == Token::RParen) { + Error("unexpected end of arguments.", Name); + ConsumeRParen(); + return; + } + RHS = ParseExpr(ExpectType); + } else { + NumberOrExprResult LHS_NOE = ParseNumberOrExpr(); + + if (Tok.kind == Token::RParen) { + Error("unexpected end of arguments.", Name); + ConsumeRParen(); + return; + } + + if (LHS_NOE.isNumber()) { + NumberOrExprResult RHS_NOE = ParseNumberOrExpr(); + + if (RHS_NOE.isNumber()) { + Error("ambiguous arguments to expression.", Name); + } else { + RHS = RHS_NOE.getExpr(); + if (RHS.isValid()) + LHS = ParseNumberToken(RHS.get().getWidth(), LHS_NOE.getNumber()); + } + } else { + LHS = LHS_NOE.getExpr(); + if (!LHS.isValid()) { + // FIXME: Should suppress ambiguity warnings here. + RHS = ParseExpr(TypeResult()); + } else { + RHS = ParseExpr(LHS.get().getWidth()); + } + } + } + + ExpectRParen("unexpected argument to expression."); +} + +ExprResult ParserImpl::ParseBinaryParenExpr(const Token &Name, + unsigned Kind, bool IsFixed, + Expr::Width ResTy) { + ExprResult LHS, RHS; + ParseMatchedBinaryArgs(Name, IsFixed ? TypeResult(ResTy) : TypeResult(), + LHS, RHS); + if (!LHS.isValid() || !RHS.isValid()) + return ref<Expr>(0, ResTy); + + ref<Expr> LHS_E = LHS.get(), RHS_E = RHS.get(); + assert(LHS_E.getWidth() == RHS_E.getWidth() && "Mismatched types!"); + + switch (Kind) { + case Expr::Add: return AddExpr::alloc(LHS_E, RHS_E); + case Expr::Sub: return SubExpr::alloc(LHS_E, RHS_E); + case Expr::Mul: return MulExpr::alloc(LHS_E, RHS_E); + case Expr::UDiv: return UDivExpr::alloc(LHS_E, RHS_E); + case Expr::SDiv: return SDivExpr::alloc(LHS_E, RHS_E); + case Expr::URem: return URemExpr::alloc(LHS_E, RHS_E); + case Expr::SRem: return SRemExpr::alloc(LHS_E, RHS_E); + + case Expr::AShr: return AShrExpr::alloc(LHS_E, RHS_E); + case Expr::LShr: return LShrExpr::alloc(LHS_E, RHS_E); + case Expr::Shl: return AndExpr::alloc(LHS_E, RHS_E); + + case Expr::And: return AndExpr::alloc(LHS_E, RHS_E); + case Expr::Or: return OrExpr::alloc(LHS_E, RHS_E); + case Expr::Xor: return XorExpr::alloc(LHS_E, RHS_E); + + case Expr::Eq: return EqExpr::alloc(LHS_E, RHS_E); + case Expr::Ne: return NeExpr::alloc(LHS_E, RHS_E); + case Expr::Ult: return UltExpr::alloc(LHS_E, RHS_E); + case Expr::Ule: return UleExpr::alloc(LHS_E, RHS_E); + case Expr::Ugt: return UgtExpr::alloc(LHS_E, RHS_E); + case Expr::Uge: return UgeExpr::alloc(LHS_E, RHS_E); + case Expr::Slt: return SltExpr::alloc(LHS_E, RHS_E); + case Expr::Sle: return SleExpr::alloc(LHS_E, RHS_E); + case Expr::Sgt: return SgtExpr::alloc(LHS_E, RHS_E); + case Expr::Sge: return SgeExpr::alloc(LHS_E, RHS_E); + default: + Error("FIXME: unhandled kind.", Name); + return ref<Expr>(0, ResTy); + } +} + +ExprResult ParserImpl::ParseSelectParenExpr(const Token &Name, + Expr::Width ResTy) { + // FIXME: Why does this need to be here? + if (Tok.kind == Token::RParen) { + Error("unexpected end of arguments.", Name); + ConsumeRParen(); + return ref<Expr>(0, ResTy); + } + + ExprResult Cond = ParseExpr(Expr::Bool); + ExprResult LHS, RHS; + ParseMatchedBinaryArgs(Name, ResTy, LHS, RHS); + if (!Cond.isValid() || !LHS.isValid() || !RHS.isValid()) + return ref<Expr>(0, ResTy); + return SelectExpr::alloc(Cond.get(), LHS.get(), RHS.get()); +} + + +// need to decide if we want to allow n-ary Concat expressions in the +// language +ExprResult ParserImpl::ParseConcatParenExpr(const Token &Name, + Expr::Width ResTy) { + std::vector<ExprHandle> Kids; + + unsigned Width = 0; + while (Tok.kind != Token::RParen) { + ExprResult E = ParseExpr(TypeResult()); + + // Skip to end of expr on error. + if (!E.isValid()) { + SkipUntilRParen(); + return ref<Expr>(0, ResTy); + } + + Kids.push_back(E.get()); + Width += E.get().getWidth(); + } + + ConsumeRParen(); + + if (Width != ResTy) { + Error("concat does not match expected result size."); + return ref<Expr>(0, ResTy); + } + + return ConcatExpr::createN(Kids.size(), &Kids[0]); +} + +ExprResult ParserImpl::ParseExtractParenExpr(const Token &Name, + Expr::Width ResTy) { + // FIXME: Pull out parse constant integer expression. + ExprResult OffsetExpr = ParseNumber(Expr::Int32); + ExprResult Child = ParseExpr(TypeResult()); + + ExpectRParen("unexpected argument to expression."); + + if (!OffsetExpr.isValid() || !Child.isValid()) + return ref<Expr>(0, ResTy); + + assert(OffsetExpr.get().isConstant() && "ParseNumber returned non-constant."); + unsigned Offset = (unsigned) OffsetExpr.get().getConstantValue(); + + if (Offset + ResTy > Child.get().getWidth()) { + Error("extract out-of-range of child expression.", Name); + return ref<Expr>(0, ResTy); + } + + return ExtractExpr::alloc(Child.get(), Offset, ResTy); +} + +ExprResult ParserImpl::ParseAnyReadParenExpr(const Token &Name, + unsigned Kind, + Expr::Width ResTy) { + NumberOrExprResult Index = ParseNumberOrExpr(); + VersionResult Array = ParseVersionSpecifier(); + ExpectRParen("unexpected argument in read expression."); + + if (!Array.isValid()) + return ref<Expr>(0, ResTy); + + // FIXME: Need generic way to get array width. Needs to work with + // anonymous arrays. + Expr::Width ArrayDomainType = Expr::Int32; + Expr::Width ArrayRangeType = Expr::Int8; + + // Coerce number to correct type. + ExprResult IndexExpr; + if (Index.isNumber()) + IndexExpr = ParseNumberToken(ArrayDomainType, Index.getNumber()); + else + IndexExpr = Index.getExpr(); + + if (!IndexExpr.isValid()) + return ref<Expr>(0, ResTy); + else if (IndexExpr.get().getWidth() != ArrayDomainType) { + Error("index width does not match array domain."); + return ref<Expr>(0, ResTy); + } + + // FIXME: Check range width. + + switch (Kind) { + default: + assert(0 && "Invalid kind."); + return ref<Expr>(0, ResTy); + case eMacroKind_ReadLSB: + case eMacroKind_ReadMSB: { + unsigned NumReads = ResTy / ArrayRangeType; + if (ResTy != NumReads*ArrayRangeType) { + Error("invalid ordered read (not multiple of range type).", Name); + return ref<Expr>(0, ResTy); + } + std::vector<ExprHandle> Kids; + Kids.reserve(NumReads); + ExprHandle Index = IndexExpr.get(); + for (unsigned i=0; i<NumReads; ++i) { + // FIXME: using folding here + ExprHandle OffsetIndex = AddExpr::create(IndexExpr.get(), + ref<Expr>(i, ArrayDomainType)); + Kids.push_back(ReadExpr::alloc(Array.get(), OffsetIndex)); + } + if (Kind == eMacroKind_ReadLSB) + std::reverse(Kids.begin(), Kids.end()); + return ConcatExpr::createN(NumReads, &Kids[0]); + } + case Expr::Read: + return ReadExpr::alloc(Array.get(), IndexExpr.get()); + } +} + +/// version-specifier = <identifier> +/// version-specifier = [<identifier>:] [ version ] +VersionResult ParserImpl::ParseVersionSpecifier() { + const Identifier *Label = 0; + if (Tok.kind == Token::Identifier) { + Token LTok = Tok; + Label = GetOrCreateIdentifier(Tok); + ConsumeToken(); + + // FIXME: hack: add array declarations and ditch this. + if (memcmp(Label->Name.c_str(), "arr", 3) == 0) { + // Declare or create array. + const ArrayDecl *&A = ArraySymTab[Label]; + if (!A) { + // Array = new ArrayDecl(Label, 0, 32, 8); + unsigned id = atoi(&Label->Name.c_str()[3]); + Array *root = new Array(0, id, 0); + // Create update list mapping of name -> array. + VersionSymTab.insert(std::make_pair(Label, + UpdateList(root, true, NULL))); + } + } + + if (Tok.kind != Token::Colon) { + VersionSymTabTy::iterator it = VersionSymTab.find(Label); + + if (it == VersionSymTab.end()) { + Error("invalid update list label reference.", LTok); + return VersionResult(false, + UpdateList(0, true, NULL)); + } + + return it->second; + } + + ConsumeToken(); + if (VersionSymTab.count(Label)) { + Error("duplicate update list label definition.", LTok); + Label = 0; + } + } + + Token Start = Tok; + VersionResult Res = ParseVersion(); + // Define update list to avoid use-of-undef errors. + if (!Res.isValid()) + Res = VersionResult(false, + UpdateList(0, true, NULL)); + + if (Label) + VersionSymTab.insert(std::make_pair(Label, Res.get())); + return Res; +} + +/// version - '[' update-list? ']' ['@' version-specifier] +/// update-list - empty +/// update-list - lhs '=' rhs [',' update-list] +VersionResult ParserImpl::ParseVersion() { + if (Tok.kind != Token::LSquare) + return VersionResult(false, UpdateList(0, false, NULL)); + + std::vector< std::pair<NumberOrExprResult, NumberOrExprResult> > Writes; + ConsumeLSquare(); + for (;;) { + // FIXME: Type check exprs. + + // FIXME: We need to do this (the above) anyway just to handle + // implicit constants correctly. + NumberOrExprResult LHS = ParseNumberOrExpr(); + + if (Tok.kind != Token::Equals) { + Error("expected '='.", Tok); + break; + } + + ConsumeToken(); + NumberOrExprResult RHS = ParseNumberOrExpr(); + + Writes.push_back(std::make_pair(LHS, RHS)); + + if (Tok.kind == Token::Comma) + ConsumeToken(); + else + break; + } + ExpectRSquare("expected close of update list"); + + VersionHandle Base(0, false, NULL); + + // Anonymous array case. + if (Tok.kind != Token::At) { + Array *root = new Array(0, 0, 0); + Base = UpdateList(root, false, NULL); + } else { + ConsumeToken(); + + VersionResult BaseRes = ParseVersionSpecifier(); + if (!BaseRes.isValid()) + return BaseRes; + + Base = BaseRes.get(); + } + + Expr::Width ArrayDomainType = Expr::Int32; + Expr::Width ArrayRangeType = Expr::Int8; + + for (std::vector< std::pair<NumberOrExprResult, NumberOrExprResult> >::reverse_iterator + it = Writes.rbegin(), ie = Writes.rend(); it != ie; ++it) { + ExprResult LHS, RHS; + // FIXME: This can be factored into common helper for coercing a + // NumberOrExpr into an Expr. + if (it->first.isNumber()) { + LHS = ParseNumberToken(ArrayDomainType, it->first.getNumber()); + } else { + LHS = it->first.getExpr(); + if (LHS.isValid() && LHS.get().getWidth() != ArrayDomainType) { + // FIXME: bad token location. We should maybe try and know the + // array up-front? + Error("invalid value in write index (doesn't match domain).", Tok); + LHS = ExprResult(); + } + } + + if (it->second.isNumber()) { + RHS = ParseNumberToken(ArrayRangeType, it->second.getNumber()); + } else { + RHS = it->second.getExpr(); + if (RHS.isValid() && RHS.get().getWidth() != ArrayRangeType) { + // FIXME: bad token location. We should maybe try and know the + // array up-front? + Error("invalid value in write assignment (doesn't match range).", Tok); + RHS = ExprResult(); + } + } + + if (LHS.isValid() && RHS.isValid()) + Base.extend(LHS.get(), RHS.get()); + } + + return Base; +} + +/// ParseNumber - Parse a number of the given type. +ExprResult ParserImpl::ParseNumber(Expr::Width Type) { + ExprResult Res = ParseNumberToken(Type, Tok); + ConsumeExpectedToken(Token::Number); + return Res; +} + +/// ParseNumberToken - Parse a number of the given type from the given +/// token. +ExprResult ParserImpl::ParseNumberToken(Expr::Width Type, const Token &Tok) { + const char *S = Tok.start; + unsigned N = Tok.length; + unsigned Radix = 10, RadixBits = 4; + bool HasMinus = false; + + // Detect +/- (a number token cannot have both). + if (S[0] == '+') { + ++S; + --N; + } else if (S[0] == '-') { + HasMinus = true; + ++S; + --N; + } + + // Detect 0[box]. + if ((Tok.length >= 2 && S[0] == '0') && + (S[1] == 'b' || S[1] == 'o' || S[1] == 'x')) { + if (S[1] == 'b') { + Radix = 2; + RadixBits = 1; + } else if (S[1] == 'o') { + Radix = 8; + RadixBits = 3; + } else { + Radix = 16; + RadixBits = 4; + } + S += 2; + N -= 2; + + // Diagnose 0[box] with no trailing digits. + if (!N) { + Error("invalid numeric token (no digits).", Tok); + return ref<Expr>(0, Type); + } + } + + // This is a simple but slow way to handle overflow. + APInt Val(std::max(64U, RadixBits * N), 0); + APInt RadixVal(Val.getBitWidth(), Radix); + APInt DigitVal(Val.getBitWidth(), 0); + for (unsigned i=0; i<N; ++i) { + unsigned Digit, Char = S[i]; + + if (Char == '_') + continue; + + if ('0' <= Char && Char <= '9') + Digit = Char - '0'; + else if ('a' <= Char && Char <= 'z') + Digit = Char - 'a' + 10; + else if ('A' <= Char && Char <= 'Z') + Digit = Char - 'A' + 10; + else { + Error("invalid character in numeric token.", Tok); + return ref<Expr>(0, Type); + } + + if (Digit >= Radix) { + Error("invalid character in numeric token (out of range).", Tok); + return ref<Expr>(0, Type); + } + + DigitVal = Digit; + Val = Val * RadixVal + DigitVal; + } + + // FIXME: Actually do the check for overflow. + if (HasMinus) + Val = -Val; + + return ExprResult(ref<Expr>(Val.trunc(Type).getZExtValue(), Type)); +} + +/// ParseTypeSpecifier - Parse a type specifier. +/// +/// type = w[0-9]+ +TypeResult ParserImpl::ParseTypeSpecifier() { + assert(Tok.kind == Token::KWWidth && "Unexpected token."); + + // FIXME: Need APInt technically. + Token TypeTok = Tok; + int width = atoi(std::string(Tok.start+1,Tok.length-1).c_str()); + ConsumeToken(); + + // FIXME: We should impose some sort of maximum just for sanity? + return TypeResult(width); +} + +void ParserImpl::Error(const char *Message, const Token &At) { + ++NumErrors; + if (MaxErrors && NumErrors >= MaxErrors) + return; + + llvm::cerr << Filename + << ":" << At.line << ":" << At.column + << ": error: " << Message << "\n"; + + // Skip carat diagnostics on EOF token. + if (At.kind == Token::EndOfFile) + return; + + // Simple caret style diagnostics. + const char *LineBegin = At.start, *LineEnd = At.start, + *BufferBegin = TheMemoryBuffer->getBufferStart(), + *BufferEnd = TheMemoryBuffer->getBufferEnd(); + + // Run line pointers forward and back. + while (LineBegin > BufferBegin && + LineBegin[-1] != '\r' && LineBegin[-1] != '\n') + --LineBegin; + while (LineEnd < BufferEnd && + LineEnd[0] != '\r' && LineEnd[0] != '\n') + ++LineEnd; + + // Show the line. + llvm::cerr << std::string(LineBegin, LineEnd) << "\n"; + + // Show the caret or squiggly, making sure to print back spaces the + // same. + for (const char *S=LineBegin; S != At.start; ++S) + llvm::cerr << (isspace(*S) ? *S : ' '); + if (At.length > 1) { + for (unsigned i=0; i<At.length; ++i) + llvm::cerr << '~'; + } else + llvm::cerr << '^'; + llvm::cerr << '\n'; +} + +// AST API +// FIXME: Move out of parser. + +Decl::Decl() {} + +void QueryCommand::dump() { + // FIXME: This is masking the difference between an actual query and + // a query decl. + ExprPPrinter::printQuery(std::cerr, + ConstraintManager(Constraints), + Query); +} + +// Public parser API + +Parser::Parser() { +} + +Parser::~Parser() { +} + +Parser *Parser::Create(const std::string Filename, + const MemoryBuffer *MB) { + ParserImpl *P = new ParserImpl(Filename, MB); + P->Initialize(); + return P; +} diff --git a/lib/Expr/Updates.cpp b/lib/Expr/Updates.cpp new file mode 100644 index 00000000..b2ceeaf1 --- /dev/null +++ b/lib/Expr/Updates.cpp @@ -0,0 +1,126 @@ +//===-- Updates.cpp -------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Expr.h" + +#include <cassert> + +using namespace klee; + +/// + +UpdateNode::UpdateNode(const UpdateNode *_next, + const ref<Expr> &_index, + const ref<Expr> &_value) + : refCount(0), + stpArray(0), + next(_next), + index(_index), + value(_value) { + assert(_value.getWidth() == Expr::Int8 && "Update value should be 8-bit wide."); + computeHash(); + if (next) { + ++next->refCount; + size = 1 + next->size; + } + else size = 1; +} + +extern "C" void vc_DeleteExpr(void*); + +UpdateNode::~UpdateNode() { + // XXX gross + if (stpArray) + ::vc_DeleteExpr(stpArray); +} + +int UpdateNode::compare(const UpdateNode &b) const { + if (int i = index.compare(b.index)) + return i; + return value.compare(b.value); +} + +unsigned UpdateNode::computeHash() { + hashValue = index.hash() ^ value.hash(); + if (next) + hashValue ^= next->hash(); + return hashValue; +} + +/// + +UpdateList::UpdateList(const Array *_root, bool _isRooted, + const UpdateNode *_head) + : root(_root), + head(_head), + isRooted(_isRooted) { + if (head) ++head->refCount; +} + +UpdateList::UpdateList(const UpdateList &b) + : root(b.root), + head(b.head), + isRooted(b.isRooted) { + if (head) ++head->refCount; +} + +UpdateList::~UpdateList() { + // We need to be careful and avoid recursion here. We do this in + // cooperation with the private dtor of UpdateNode which does not + // recursively free its tail. + while (head && --head->refCount==0) { + const UpdateNode *n = head->next; + delete head; + head = n; + } +} + +UpdateList &UpdateList::operator=(const UpdateList &b) { + if (b.head) ++b.head->refCount; + if (head && --head->refCount==0) delete head; + root = b.root; + head = b.head; + isRooted = b.isRooted; + return *this; +} + +void UpdateList::extend(const ref<Expr> &index, const ref<Expr> &value) { + if (head) --head->refCount; + head = new UpdateNode(head, index, value); + ++head->refCount; +} + +int UpdateList::compare(const UpdateList &b) const { + // use object id to increase determinism + if (root->id != b.root->id) + return root->id < b.root->id ? -1 : 1; + + if (getSize() < b.getSize()) return -1; + else if (getSize() > b.getSize()) return 1; + + // XXX build comparison into update, make fast + const UpdateNode *an=head, *bn=b.head; + for (; an && bn; an=an->next,bn=bn->next) { + if (an==bn) { // exploit shared list structure + return 0; + } else { + if (int res = an->compare(*bn)) + return res; + } + } + assert(!an && !bn); + return 0; +} + +unsigned UpdateList::hash() const { + unsigned res = root->id * Expr::MAGIC_HASH_CONSTANT; + if (head) + res ^= head->hash(); + return res; +} diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 00000000..e12fef50 --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,15 @@ +#===-- lib/Makefile ----------------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=.. + +PARALLEL_DIRS=Basic Support Expr Solver Module Core + +include $(LEVEL)/Makefile.common + diff --git a/lib/Module/Checks.cpp b/lib/Module/Checks.cpp new file mode 100644 index 00000000..ca4eeb44 --- /dev/null +++ b/lib/Module/Checks.cpp @@ -0,0 +1,68 @@ +//===-- Checks.cpp --------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; +using namespace klee; + +char DivCheckPass::ID; + +bool DivCheckPass::runOnModule(Module &M) { + Function *divZeroCheckFunction = 0; + + bool moduleChanged = false; + + for (Module::iterator f = M.begin(), fe = M.end(); f != fe; ++f) { + for (Function::iterator b = f->begin(), be = f->end(); b != be; ++b) { + for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) { + if (BinaryOperator* binOp = dyn_cast<BinaryOperator>(i)) { + // find all [s|u][div|mod] instructions + Instruction::BinaryOps opcode = binOp->getOpcode(); + if (opcode == Instruction::SDiv || opcode == Instruction::UDiv || + opcode == Instruction::SRem || opcode == Instruction::URem) { + + CastInst *denominator = + CastInst::CreateIntegerCast(i->getOperand(1), + (Type*)Type::Int64Ty, + false, /* sign doesn't matter */ + "int_cast_to_i64", + i); + + // Lazily bind the function to avoid always importing it. + if (!divZeroCheckFunction) { + Constant *fc = M.getOrInsertFunction("klee_div_zero_check", + Type::VoidTy, + Type::Int64Ty, NULL); + divZeroCheckFunction = cast<Function>(fc); + } + + CallInst::Create(divZeroCheckFunction, denominator, "", &*i); + moduleChanged = true; + } + } + } + } + } + return moduleChanged; +} diff --git a/lib/Module/InstructionInfoTable.cpp b/lib/Module/InstructionInfoTable.cpp new file mode 100644 index 00000000..82874406 --- /dev/null +++ b/lib/Module/InstructionInfoTable.cpp @@ -0,0 +1,196 @@ +//===-- InstructionInfoTable.cpp ------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Module/InstructionInfoTable.h" + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" + +#include <map> +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> + +using namespace llvm; +using namespace klee; + +class InstructionToLineAnnotator : public llvm::AssemblyAnnotationWriter { +public: + void emitInstructionAnnot(const Instruction *i, llvm::raw_ostream &os) { + os << "%%%" << (uintptr_t) i; + } +}; + +static void buildInstructionToLineMap(Module *m, + std::map<const Instruction*, unsigned> &out) { + InstructionToLineAnnotator a; + std::ostringstream buffer; + m->print(buffer, &a); + std::string str = buffer.str(); + const char *s; + + unsigned line = 1; + for (s=str.c_str(); *s; s++) { + if (*s=='\n') { + line++; + if (s[1]=='%' && s[2]=='%' && s[3]=='%') { + s += 4; + char *end; + unsigned long long value = strtoull(s, &end, 10); + if (end!=s) { + out.insert(std::make_pair((const Instruction*) value, line)); + } + s = end; + } + } + } +} + +static std::string getDSPIPath(DbgStopPointInst *dspi) { + std::string dir, file; + bool res = GetConstantStringInfo(dspi->getDirectory(), dir); + assert(res && "GetConstantStringInfo failed"); + res = GetConstantStringInfo(dspi->getFileName(), file); + assert(res && "GetConstantStringInfo failed"); + if (dir.empty()) { + return file; + } else if (*dir.rbegin() == '/') { + return dir + file; + } else { + return dir + "/" + file; + } +} + +InstructionInfoTable::InstructionInfoTable(Module *m) + : dummyString(""), dummyInfo(0, dummyString, 0, 0) { + unsigned id = 0; + std::map<const Instruction*, unsigned> lineTable; + buildInstructionToLineMap(m, lineTable); + + for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); + fnIt != fn_ie; ++fnIt) { + const std::string *initialFile = &dummyString; + unsigned initialLine = 0; + + // It may be better to look for the closest stoppoint to the entry + // following the CFG, but it is not clear that it ever matters in + // practice. + for (inst_iterator it = inst_begin(fnIt), ie = inst_end(fnIt); + it != ie; ++it) { + if (DbgStopPointInst *dspi = dyn_cast<DbgStopPointInst>(&*it)) { + initialFile = internString(getDSPIPath(dspi)); + initialLine = dspi->getLine(); + break; + } + } + + typedef std::map<BasicBlock*, std::pair<const std::string*,unsigned> > + sourceinfo_ty; + sourceinfo_ty sourceInfo; + for (llvm::Function::iterator bbIt = fnIt->begin(), bbie = fnIt->end(); + bbIt != bbie; ++bbIt) { + std::pair<sourceinfo_ty::iterator, bool> + res = sourceInfo.insert(std::make_pair(bbIt, + std::make_pair(initialFile, + initialLine))); + if (!res.second) + continue; + + std::vector<BasicBlock*> worklist; + worklist.push_back(bbIt); + + do { + BasicBlock *bb = worklist.back(); + worklist.pop_back(); + + sourceinfo_ty::iterator si = sourceInfo.find(bb); + assert(si != sourceInfo.end()); + const std::string *file = si->second.first; + unsigned line = si->second.second; + + for (BasicBlock::iterator it = bb->begin(), ie = bb->end(); + it != ie; ++it) { + Instruction *instr = it; + unsigned assemblyLine = 0; + std::map<const Instruction*, unsigned>::const_iterator ltit = + lineTable.find(instr); + if (ltit!=lineTable.end()) + assemblyLine = ltit->second; + if (DbgStopPointInst *dspi = dyn_cast<DbgStopPointInst>(instr)) { + file = internString(getDSPIPath(dspi)); + line = dspi->getLine(); + } + infos.insert(std::make_pair(instr, + InstructionInfo(id++, + *file, + line, + assemblyLine))); + } + + for (succ_iterator it = succ_begin(bb), ie = succ_end(bb); + it != ie; ++it) { + if (sourceInfo.insert(std::make_pair(*it, + std::make_pair(file, line))).second) + worklist.push_back(*it); + } + } while (!worklist.empty()); + } + } +} + +InstructionInfoTable::~InstructionInfoTable() { + for (std::set<const std::string *, ltstr>::iterator + it = internedStrings.begin(), ie = internedStrings.end(); + it != ie; ++it) + delete *it; +} + +const std::string *InstructionInfoTable::internString(std::string s) { + std::set<const std::string *, ltstr>::iterator it = internedStrings.find(&s); + if (it==internedStrings.end()) { + std::string *interned = new std::string(s); + internedStrings.insert(interned); + return interned; + } else { + return *it; + } +} + +unsigned InstructionInfoTable::getMaxID() const { + return infos.size(); +} + +const InstructionInfo & +InstructionInfoTable::getInfo(const Instruction *inst) const { + std::map<const llvm::Instruction*, InstructionInfo>::const_iterator it = + infos.find(inst); + if (it==infos.end()) { + return dummyInfo; + } else { + return it->second; + } +} + +const InstructionInfo & +InstructionInfoTable::getFunctionInfo(const Function *f) const { + if (f->isDeclaration()) { + return dummyInfo; + } else { + return getInfo(f->begin()->begin()); + } +} diff --git a/lib/Module/IntrinsicCleaner.cpp b/lib/Module/IntrinsicCleaner.cpp new file mode 100644 index 00000000..e59b7ff6 --- /dev/null +++ b/lib/Module/IntrinsicCleaner.cpp @@ -0,0 +1,119 @@ +//===-- IntrinsicCleaner.cpp ----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +namespace klee { + +char IntrinsicCleanerPass::ID; + +bool IntrinsicCleanerPass::runOnModule(Module &M) { + bool dirty = false; + for (Module::iterator f = M.begin(), fe = M.end(); f != fe; ++f) + for (Function::iterator b = f->begin(), be = f->end(); b != be; ++b) + dirty |= runOnBasicBlock(*b); + return dirty; +} + +bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b) { + bool dirty = false; + + for (BasicBlock::iterator i = b.begin(), ie = b.end(); i != ie;) { + IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i); + // increment now since LowerIntrinsic deletion makes iterator invalid. + ++i; + if(ii) { + switch (ii->getIntrinsicID()) { + case Intrinsic::vastart: + case Intrinsic::vaend: + break; + + // Lower vacopy so that object resolution etc is handled by + // normal instructions. FIXME: This is broken for non-x86_32. + case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src) + Value *dst = ii->getOperand(1); + Value *src = ii->getOperand(2); + Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); + Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii); + Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii); + Value *load = new LoadInst(castedSrc, "vacopy.read", ii); + new StoreInst(load, castedDst, false, ii); + ii->removeFromParent(); + delete ii; + break; + } + + case Intrinsic::dbg_stoppoint: { + // We can remove this stoppoint if the next instruction is + // sure to be another stoppoint. This is nice for cleanliness + // but also important for switch statements where it can allow + // the targets to be joined. + bool erase = false; + if (isa<DbgStopPointInst>(i) || + isa<UnreachableInst>(i)) { + erase = true; + } else if (isa<BranchInst>(i) || + isa<SwitchInst>(i)) { + BasicBlock *bb = i->getParent(); + erase = true; + for (succ_iterator it=succ_begin(bb), ie=succ_end(bb); + it!=ie; ++it) { + if (!isa<DbgStopPointInst>(it->getFirstNonPHI())) { + erase = false; + break; + } + } + } + + if (erase) { + ii->eraseFromParent(); + dirty = true; + } + break; + } + + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + case Intrinsic::dbg_declare: + // Remove these regardless of lower intrinsics flag. This can + // be removed once IntrinsicLowering is fixed to not have bad + // caches. + ii->eraseFromParent(); + dirty = true; + break; + + default: + if (LowerIntrinsics) + IL->LowerIntrinsicCall(ii); + dirty = true; + break; + } + } + } + + return dirty; +} +} diff --git a/lib/Module/KInstruction.cpp b/lib/Module/KInstruction.cpp new file mode 100644 index 00000000..799620c6 --- /dev/null +++ b/lib/Module/KInstruction.cpp @@ -0,0 +1,19 @@ +//===-- KInstruction.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Module/KInstruction.h" + +using namespace llvm; +using namespace klee; + +/***/ + +KInstruction::~KInstruction() { + delete[] operands; +} diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp new file mode 100644 index 00000000..5d88fbda --- /dev/null +++ b/lib/Module/KModule.cpp @@ -0,0 +1,506 @@ +//===-- KModule.cpp -------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// FIXME: This does not belong here. +#include "../Core/Common.h" + +#include "klee/Internal/Module/KModule.h" + +#include "Passes.h" + +#include "klee/Interpreter.h" +#include "klee/Internal/Module/Cell.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Support/ModuleUtil.h" + +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/System/Path.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" + +#include <sstream> + +using namespace llvm; +using namespace klee; + +namespace { + enum SwitchImplType { + eSwitchTypeSimple, + eSwitchTypeLLVM, + eSwitchTypeInternal + }; + + cl::list<std::string> + MergeAtExit("merge-at-exit"); + + cl::opt<bool> + NoTruncateSourceLines("no-truncate-source-lines", + cl::desc("Don't truncate long lines in the output source")); + + cl::opt<bool> + OutputSource("output-source", + cl::desc("Write the assembly for the final transformed source"), + cl::init(true)); + + cl::opt<bool> + OutputModule("output-module", + cl::desc("Write the bitcode for the final transformed module"), + cl::init(false)); + + cl::opt<SwitchImplType> + SwitchType("switch-type", cl::desc("Select the implementation of switch"), + cl::values(clEnumValN(eSwitchTypeSimple, "simple", + "lower to ordered branches"), + clEnumValN(eSwitchTypeLLVM, "llvm", + "lower using LLVM"), + clEnumValN(eSwitchTypeInternal, "internal", + "execute switch internally"), + clEnumValEnd), + cl::init(eSwitchTypeInternal)); + + cl::opt<bool> + DebugPrintEscapingFunctions("debug-print-escaping-functions", + cl::desc("Print functions whose address is taken.")); +} + +KModule::KModule(Module *_module) + : module(_module), + targetData(new TargetData(module)), + dbgStopPointFn(0), + kleeMergeFn(0), + infos(0), + constantTable(0) { +} + +KModule::~KModule() { + delete[] constantTable; + delete infos; + + for (std::vector<KFunction*>::iterator it = functions.begin(), + ie = functions.end(); it != ie; ++it) + delete *it; + + delete targetData; + delete module; +} + +/***/ + +namespace llvm { +extern void Optimize(Module*); +} + +// what a hack +static Function *getStubFunctionForCtorList(Module *m, + GlobalVariable *gv, + std::string name) { + assert(!gv->isDeclaration() && !gv->hasInternalLinkage() && + "do not support old LLVM style constructor/destructor lists"); + + std::vector<const Type*> nullary; + + Function *fn = Function::Create(FunctionType::get(Type::VoidTy, + nullary, false), + GlobalVariable::InternalLinkage, + name, + m); + BasicBlock *bb = BasicBlock::Create("entry", fn); + + // From lli: + // Should be an array of '{ int, void ()* }' structs. The first value is + // the init priority, which we ignore. + ConstantArray *arr = dyn_cast<ConstantArray>(gv->getInitializer()); + if (arr) { + for (unsigned i=0; i<arr->getNumOperands(); i++) { + ConstantStruct *cs = cast<ConstantStruct>(arr->getOperand(i)); + assert(cs->getNumOperands()==2 && "unexpected element in ctor initializer list"); + + Constant *fp = cs->getOperand(1); + if (!fp->isNullValue()) { + if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(fp)) + fp = ce->getOperand(0); + + if (Function *f = dyn_cast<Function>(fp)) { + CallInst::Create(f, "", bb); + } else { + assert(0 && "unable to get function pointer from ctor initializer list"); + } + } + } + } + + ReturnInst::Create(bb); + + return fn; +} + +static void injectStaticConstructorsAndDestructors(Module *m) { + GlobalVariable *ctors = m->getNamedGlobal("llvm.global_ctors"); + GlobalVariable *dtors = m->getNamedGlobal("llvm.global_dtors"); + + if (ctors || dtors) { + Function *mainFn = m->getFunction("main"); + assert(mainFn && "unable to find main function"); + + if (ctors) + CallInst::Create(getStubFunctionForCtorList(m, ctors, "klee.ctor_stub"), + "", mainFn->begin()->begin()); + if (dtors) { + Function *dtorStub = getStubFunctionForCtorList(m, dtors, "klee.dtor_stub"); + for (Function::iterator it = mainFn->begin(), ie = mainFn->end(); + it != ie; ++it) { + if (isa<ReturnInst>(it->getTerminator())) + CallInst::Create(dtorStub, "", it->getTerminator()); + } + } + } +} + +static void forceImport(Module *m, const char *name, const Type *retType, ...) { + // If module lacks an externally visible symbol for the name then we + // need to create one. We have to look in the symbol table because + // we want to check everything (global variables, functions, and + // aliases). + + Value *v = m->getValueSymbolTable().lookup(name); + GlobalValue *gv = dyn_cast_or_null<GlobalValue>(v); + + if (!gv || gv->hasInternalLinkage()) { + va_list ap; + + va_start(ap, retType); + std::vector<const Type *> argTypes; + while (const Type *t = va_arg(ap, const Type*)) + argTypes.push_back(t); + va_end(ap); + + m->getOrInsertFunction(name, FunctionType::get(retType, argTypes, false)); + } +} + +void KModule::prepare(const Interpreter::ModuleOptions &opts, + InterpreterHandler *ih) { + if (!MergeAtExit.empty()) { + Function *mergeFn = module->getFunction("klee_merge"); + if (!mergeFn) { + const llvm::FunctionType *Ty = + FunctionType::get(Type::VoidTy, std::vector<const Type*>(), false); + mergeFn = Function::Create(Ty, GlobalVariable::ExternalLinkage, + "klee_merge", + module); + } + + for (cl::list<std::string>::iterator it = MergeAtExit.begin(), + ie = MergeAtExit.end(); it != ie; ++it) { + std::string &name = *it; + Function *f = module->getFunction(name); + if (!f) { + klee_error("cannot insert merge-at-exit for: %s (cannot find)", + name.c_str()); + } else if (f->isDeclaration()) { + klee_error("cannot insert merge-at-exit for: %s (external)", + name.c_str()); + } + + BasicBlock *exit = BasicBlock::Create("exit", f); + PHINode *result = 0; + if (f->getReturnType() != Type::VoidTy) + result = PHINode::Create(f->getReturnType(), "retval", exit); + CallInst::Create(mergeFn, "", exit); + ReturnInst::Create(result, exit); + + llvm::cerr << "KLEE: adding klee_merge at exit of: " << name << "\n"; + for (llvm::Function::iterator bbit = f->begin(), bbie = f->end(); + bbit != bbie; ++bbit) { + if (&*bbit != exit) { + Instruction *i = bbit->getTerminator(); + if (i->getOpcode()==Instruction::Ret) { + if (result) { + result->addIncoming(i->getOperand(0), bbit); + } + i->eraseFromParent(); + BranchInst::Create(exit, bbit); + } + } + } + } + } + + // Inject checks prior to optimization... we also perform the + // invariant transformations that we will end up doing later so that + // optimize is seeing what is as close as possible to the final + // module. + PassManager pm; + pm.add(new RaiseAsmPass()); + if (opts.CheckDivZero) pm.add(new DivCheckPass()); + // FIXME: This false here is to work around a bug in + // IntrinsicLowering which caches values which may eventually be + // deleted (via RAUW). This can be removed once LLVM fixes this + // issue. + pm.add(new IntrinsicCleanerPass(*targetData, false)); + pm.run(*module); + + if (opts.Optimize) + Optimize(module); + + // Force importing functions required by intrinsic lowering. Kind of + // unfortunate clutter when we don't need them but we won't know + // that until after all linking and intrinsic lowering is + // done. After linking and passes we just try to manually trim these + // by name. We only add them if such a function doesn't exist to + // avoid creating stale uses. + + forceImport(module, "memcpy", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + targetData->getIntPtrType(), (Type*) 0); + forceImport(module, "memmove", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + targetData->getIntPtrType(), (Type*) 0); + forceImport(module, "memset", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + Type::Int32Ty, + targetData->getIntPtrType(), (Type*) 0); + + // FIXME: Missing force import for various math functions. + + // FIXME: Find a way that we can test programs without requiring + // this to be linked in, it makes low level debugging much more + // annoying. + llvm::sys::Path path(opts.LibraryDir); + path.appendComponent("libintrinsic.bca"); + module = linkWithLibrary(module, path.c_str()); + + // Needs to happen after linking (since ctors/dtors can be modified) + // and optimization (since global optimization can rewrite lists). + injectStaticConstructorsAndDestructors(module); + + // Finally, run the passes that maintain invariants we expect during + // interpretation. We run the intrinsic cleaner just in case we + // linked in something with intrinsics but any external calls are + // going to be unresolved. We really need to handle the intrinsics + // directly I think? + PassManager pm3; + pm3.add(createCFGSimplificationPass()); + switch(SwitchType) { + case eSwitchTypeInternal: break; + case eSwitchTypeSimple: pm3.add(new LowerSwitchPass()); break; + case eSwitchTypeLLVM: pm3.add(createLowerSwitchPass()); break; + default: klee_error("invalid --switch-type"); + } + pm3.add(new IntrinsicCleanerPass(*targetData)); + pm3.add(new PhiCleanerPass()); + pm3.run(*module); + + // For cleanliness see if we can discard any of the functions we + // forced to import. + Function *f; + f = module->getFunction("memcpy"); + if (f && f->use_empty()) f->eraseFromParent(); + f = module->getFunction("memmove"); + if (f && f->use_empty()) f->eraseFromParent(); + f = module->getFunction("memset"); + if (f && f->use_empty()) f->eraseFromParent(); + + + // Write out the .ll assembly file. We truncate long lines to work + // around a kcachegrind parsing bug (it puts them on new lines), so + // that source browsing works. + if (OutputSource) { + std::ostream *os = ih->openOutputFile("assembly.ll"); + assert(os && os->good() && "unable to open source output"); + + // We have an option for this in case the user wants a .ll they + // can compile. + if (NoTruncateSourceLines) { + *os << *module; + } else { + bool truncated = false; + std::stringstream buffer; + buffer << *module; + std::string string = buffer.str(); + const char *position = string.c_str(); + + for (;;) { + const char *end = index(position, '\n'); + if (!end) { + *os << position; + break; + } else { + unsigned count = (end - position) + 1; + if (count<255) { + os->write(position, count); + } else { + os->write(position, 254); + *os << "\n"; + truncated = true; + } + position = end+1; + } + } + } + + delete os; + } + + if (OutputModule) { + std::ostream *f = ih->openOutputFile("final.bc"); + WriteBitcodeToFile(module, *f); + delete f; + } + + dbgStopPointFn = module->getFunction("llvm.dbg.stoppoint"); + kleeMergeFn = module->getFunction("klee_merge"); + + /* Build shadow structures */ + + infos = new InstructionInfoTable(module); + + for (Module::iterator it = module->begin(), ie = module->end(); + it != ie; ++it) { + if (it->isDeclaration()) + continue; + + KFunction *kf = new KFunction(it, this); + + for (unsigned i=0; i<kf->numInstructions; ++i) { + KInstruction *ki = kf->instructions[i]; + ki->info = &infos->getInfo(ki->inst); + } + + functions.push_back(kf); + functionMap.insert(std::make_pair(it, kf)); + } + + /* Compute various interesting properties */ + + for (std::vector<KFunction*>::iterator it = functions.begin(), + ie = functions.end(); it != ie; ++it) { + KFunction *kf = *it; + if (functionEscapes(kf->function)) + escapingFunctions.insert(kf->function); + } + + if (DebugPrintEscapingFunctions && !escapingFunctions.empty()) { + llvm::cerr << "KLEE: escaping functions: ["; + for (std::set<Function*>::iterator it = escapingFunctions.begin(), + ie = escapingFunctions.end(); it != ie; ++it) { + llvm::cerr << (*it)->getName() << ", "; + } + llvm::cerr << "]\n"; + } +} + +KConstant* KModule::getKConstant(Constant *c) { + std::map<llvm::Constant*, KConstant*>::iterator it = constantMap.find(c); + if (it != constantMap.end()) + return it->second; + return NULL; +} + +unsigned KModule::getConstantID(Constant *c, KInstruction* ki) { + KConstant *kc = getKConstant(c); + if (kc) + return kc->id; + + unsigned id = constants.size(); + kc = new KConstant(c, id, ki); + constantMap.insert(std::make_pair(c, kc)); + constants.push_back(c); + return id; +} + +/***/ + +KConstant::KConstant(llvm::Constant* _ct, unsigned _id, KInstruction* _ki) { + ct = _ct; + id = _id; + ki = _ki; +} + +/***/ + +KFunction::KFunction(llvm::Function *_function, + KModule *km) + : function(_function), + numArgs(function->arg_size()), + numInstructions(0), + trackCoverage(true) { + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + BasicBlock *bb = bbit; + basicBlockEntry[bb] = numInstructions; + numInstructions += bb->size(); + } + + instructions = new KInstruction*[numInstructions]; + + std::map<Instruction*, unsigned> registerMap; + + // The first arg_size() registers are reserved for formals. + unsigned rnum = numArgs; + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); + it != ie; ++it) + registerMap[it] = rnum++; + } + numRegisters = rnum; + + unsigned i = 0; + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); + it != ie; ++it) { + KInstruction *ki; + + switch(it->getOpcode()) { + case Instruction::GetElementPtr: + ki = new KGEPInstruction(); break; + default: + ki = new KInstruction(); break; + } + + unsigned numOperands = it->getNumOperands(); + ki->inst = it; + ki->operands = new int[numOperands]; + ki->dest = registerMap[it]; + for (unsigned j=0; j<numOperands; j++) { + Value *v = it->getOperand(j); + + if (Instruction *inst = dyn_cast<Instruction>(v)) { + ki->operands[j] = registerMap[inst]; + } else if (Argument *a = dyn_cast<Argument>(v)) { + ki->operands[j] = a->getArgNo(); + } else if (isa<BasicBlock>(v) || isa<InlineAsm>(v)) { + ki->operands[j] = -1; + } else { + assert(isa<Constant>(v)); + Constant *c = cast<Constant>(v); + ki->operands[j] = -(km->getConstantID(c, ki) + 2); + } + } + + instructions[i++] = ki; + } + } +} + +KFunction::~KFunction() { + for (unsigned i=0; i<numInstructions; ++i) + delete instructions[i]; + delete[] instructions; +} diff --git a/lib/Module/LowerSwitch.cpp b/lib/Module/LowerSwitch.cpp new file mode 100644 index 00000000..a1b887f3 --- /dev/null +++ b/lib/Module/LowerSwitch.cpp @@ -0,0 +1,134 @@ +//===-- LowerSwitch.cpp - Eliminate Switch instructions -------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Derived from LowerSwitch.cpp in LLVM, heavily modified by piotrek +// to get rid of the binary search transform, as it was creating +// multiple paths through the program (i.e., extra paths that didn't +// exist in the original program). +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" +#include <algorithm> + +using namespace llvm; + +namespace klee { + +char LowerSwitchPass::ID = 0; + +// The comparison function for sorting the switch case values in the vector. +struct SwitchCaseCmp { + bool operator () (const LowerSwitchPass::SwitchCase& C1, + const LowerSwitchPass::SwitchCase& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.value); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.value); + return CI1->getValue().slt(CI2->getValue()); + } +}; + +bool LowerSwitchPass::runOnFunction(Function &F) { + bool changed = false; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *cur = I++; // Advance over block so we don't traverse new blocks + + if (SwitchInst *SI = dyn_cast<SwitchInst>(cur->getTerminator())) { + changed = true; + processSwitchInst(SI); + } + } + + return changed; +} + +// switchConvert - Convert the switch statement into a linear scan +// through all the case values +void LowerSwitchPass::switchConvert(CaseItr begin, CaseItr end, + Value* value, BasicBlock* origBlock, + BasicBlock* defaultBlock) +{ + BasicBlock *curHead = defaultBlock; + Function *F = origBlock->getParent(); + + // iterate through all the cases, creating a new BasicBlock for each + for (CaseItr it = begin; it < end; ++it) { + BasicBlock *newBlock = BasicBlock::Create("NodeBlock"); + Function::iterator FI = origBlock; + F->getBasicBlockList().insert(++FI, newBlock); + + ICmpInst *cmpInst = new ICmpInst(ICmpInst::ICMP_EQ, + value, + it->value, + "Case Comparison"); + + newBlock->getInstList().push_back(cmpInst); + BranchInst::Create(it->block, curHead, cmpInst, newBlock); + + // If there were any PHI nodes in this successor, rewrite one entry + // from origBlock to come from newBlock. + for (BasicBlock::iterator bi = it->block->begin(); isa<PHINode>(bi); ++bi) { + PHINode* PN = cast<PHINode>(bi); + + int blockIndex = PN->getBasicBlockIndex(origBlock); + assert(blockIndex != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)blockIndex, newBlock); + } + + curHead = newBlock; + } + + // Branch to our shiny new if-then stuff... + BranchInst::Create(curHead, origBlock); +} + +// processSwitchInst - Replace the specified switch instruction with a sequence +// of chained if-then instructions. +// +void LowerSwitchPass::processSwitchInst(SwitchInst *SI) { + BasicBlock *origBlock = SI->getParent(); + BasicBlock *defaultBlock = SI->getDefaultDest(); + Function *F = origBlock->getParent(); + Value *switchValue = SI->getOperand(0); + + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock* newDefault = BasicBlock::Create("newDefault"); + + F->getBasicBlockList().insert(defaultBlock, newDefault); + BranchInst::Create(defaultBlock, newDefault); + + // If there is an entry in any PHI nodes for the default edge, make sure + // to update them as well. + for (BasicBlock::iterator I = defaultBlock->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + int BlockIdx = PN->getBasicBlockIndex(origBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, newDefault); + } + + CaseVector cases; + for (unsigned i = 1; i < SI->getNumSuccessors(); ++i) + cases.push_back(SwitchCase(SI->getSuccessorValue(i), + SI->getSuccessor(i))); + + // reverse cases, as switchConvert constructs a chain of + // basic blocks by appending to the front. if we reverse, + // the if comparisons will happen in the same order + // as the cases appear in the switch + std::reverse(cases.begin(), cases.end()); + + switchConvert(cases.begin(), cases.end(), switchValue, origBlock, newDefault); + + // We are now done with the switch instruction, so delete it + origBlock->getInstList().erase(SI); +} + +} diff --git a/lib/Module/Makefile b/lib/Module/Makefile new file mode 100755 index 00000000..bfd7c469 --- /dev/null +++ b/lib/Module/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Module/Makefile ---------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleeModule +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Module/ModuleUtil.cpp b/lib/Module/ModuleUtil.cpp new file mode 100644 index 00000000..d86b9d48 --- /dev/null +++ b/lib/Module/ModuleUtil.cpp @@ -0,0 +1,101 @@ +//===-- ModuleUtil.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Support/ModuleUtil.h" + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" + +#include <map> +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> + +using namespace llvm; +using namespace klee; + +Module *klee::linkWithLibrary(Module *module, + const std::string &libraryName) { + try { + Linker linker("klee", module, false); + + llvm::sys::Path libraryPath(libraryName); + bool native = false; + + if (linker.LinkInFile(libraryPath, native)) { + assert(0 && "linking in library failed!"); + } + + return linker.releaseModule(); + } catch (...) { + assert(0 && "error during linking"); + } +} + +Function *klee::getDirectCallTarget(const Instruction *i) { + assert(isa<CallInst>(i) || isa<InvokeInst>(i)); + + Value *v = i->getOperand(0); + if (Function *f = dyn_cast<Function>(v)) { + return f; + } else if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(v)) { + if (ce->getOpcode()==Instruction::BitCast) + if (Function *f = dyn_cast<Function>(ce->getOperand(0))) + return f; + + // NOTE: This assert may fire, it isn't necessarily a problem and + // can be disabled, I just wanted to know when and if it happened. + assert(0 && "FIXME: Unresolved direct target for a constant expression."); + } + + return 0; +} + +static bool valueIsOnlyCalled(const Value *v) { + for (Value::use_const_iterator it = v->use_begin(), ie = v->use_end(); + it != ie; ++it) { + if (const Instruction *instr = dyn_cast<Instruction>(*it)) { + if (instr->getOpcode()==0) continue; // XXX function numbering inst + if (!isa<CallInst>(instr) && !isa<InvokeInst>(instr)) return false; + + // Make sure that the value is only the target of this call and + // not an argument. + for (unsigned i=1,e=instr->getNumOperands(); i!=e; ++i) + if (instr->getOperand(i)==v) + return false; + } else if (const llvm::ConstantExpr *ce = + dyn_cast<llvm::ConstantExpr>(*it)) { + if (ce->getOpcode()==Instruction::BitCast) + if (valueIsOnlyCalled(ce)) + continue; + return false; + } else if (const GlobalAlias *ga = dyn_cast<GlobalAlias>(*it)) { + // XXX what about v is bitcast of aliasee? + if (v==ga->getAliasee() && !valueIsOnlyCalled(ga)) + return false; + } else { + return false; + } + } + + return true; +} + +bool klee::functionEscapes(const Function *f) { + return !valueIsOnlyCalled(f); +} diff --git a/lib/Module/Optimize.cpp b/lib/Module/Optimize.cpp new file mode 100644 index 00000000..83e67292 --- /dev/null +++ b/lib/Module/Optimize.cpp @@ -0,0 +1,272 @@ +// FIXME: This file is a bastard child of opt.cpp and llvm-ld's +// Optimize.cpp. This stuff should live in common code. + + +//===- Optimize.cpp - Optimize a complete program -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements all optimization of the linked module for llvm-ld. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/PassNameParser.h" +#include "llvm/Support/PluginLoader.h" +#include <iostream> +using namespace llvm; + +#if 0 +// Pass Name Options as generated by the PassNameParser +static cl::list<const PassInfo*, bool, PassNameParser> + OptimizationList(cl::desc("Optimizations available:")); +#endif + +// Don't verify at the end +static cl::opt<bool> DontVerify("disable-verify", cl::ReallyHidden); + +static cl::opt<bool> DisableInline("disable-inlining", + cl::desc("Do not run the inliner pass")); + +static cl::opt<bool> +DisableOptimizations("disable-opt", + cl::desc("Do not run any optimization passes")); + +static cl::opt<bool> DisableInternalize("disable-internalize", + cl::desc("Do not mark all symbols as internal")); + +static cl::opt<bool> VerifyEach("verify-each", + cl::desc("Verify intermediate results of all passes")); + +static cl::alias ExportDynamic("export-dynamic", + cl::aliasopt(DisableInternalize), + cl::desc("Alias for -disable-internalize")); + +static cl::opt<bool> Strip("strip-all", + cl::desc("Strip all symbol info from executable")); + +static cl::alias A0("s", cl::desc("Alias for --strip-all"), + cl::aliasopt(Strip)); + +static cl::opt<bool> StripDebug("strip-debug", + cl::desc("Strip debugger symbol info from executable")); + +static cl::alias A1("S", cl::desc("Alias for --strip-debug"), + cl::aliasopt(StripDebug)); + +// A utility function that adds a pass to the pass manager but will also add +// a verifier pass after if we're supposed to verify. +static inline void addPass(PassManager &PM, Pass *P) { + // Add the pass to the pass manager... + PM.add(P); + + // If we are verifying all of the intermediate steps, add the verifier... + if (VerifyEach) + PM.add(createVerifierPass()); +} + +namespace llvm { + + +static void AddStandardCompilePasses(PassManager &PM) { + PM.add(createVerifierPass()); // Verify that input is correct + + addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + // If the -strip-debug command line option was specified, do it. + if (StripDebug) + addPass(PM, createStripSymbolsPass(true)); + + if (DisableOptimizations) return; + + addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst + addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code + addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas + addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars + addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs + addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation + addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination + addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE + addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE + + addPass(PM, createPruneEHPass()); // Remove dead EH info + addPass(PM, createFunctionAttrsPass()); // Deduce function attrs + + if (!DisableInline) + addPass(PM, createFunctionInliningPass()); // Inline small functions + addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args + + addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations + addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl. + addPass(PM, createJumpThreadingPass()); // Thread jumps. + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas + addPass(PM, createInstructionCombiningPass()); // Combine silly seq's + addPass(PM, createCondPropagationPass()); // Propagate conditionals + + addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createReassociatePass()); // Reassociate expressions + addPass(PM, createLoopRotatePass()); + addPass(PM, createLICMPass()); // Hoist loop invariants + addPass(PM, createLoopUnswitchPass()); // Unswitch loops. + addPass(PM, createLoopIndexSplitPass()); // Index split loops. + // FIXME : Removing instcombine causes nestedloop regression. + addPass(PM, createInstructionCombiningPass()); + addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars + addPass(PM, createLoopDeletionPass()); // Delete dead loops + addPass(PM, createLoopUnrollPass()); // Unroll small loops + addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller + addPass(PM, createGVNPass()); // Remove redundancies + addPass(PM, createMemCpyOptPass()); // Remove memcpy / form memset + addPass(PM, createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + addPass(PM, createInstructionCombiningPass()); + addPass(PM, createCondPropagationPass()); // Propagate conditionals + + addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores + addPass(PM, createAggressiveDCEPass()); // Delete dead instructions + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createStripDeadPrototypesPass()); // Get rid of dead prototypes + addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types + addPass(PM, createConstantMergePass()); // Merge dup global constants +} + +/// Optimize - Perform link time optimizations. This will run the scalar +/// optimizations, any loaded plugin-optimization modules, and then the +/// inter-procedural optimizations if applicable. +void Optimize(Module* M) { + + // Instantiate the pass manager to organize the passes. + PassManager Passes; + + // If we're verifying, start off with a verification pass. + if (VerifyEach) + Passes.add(createVerifierPass()); + + // Add an appropriate TargetData instance for this module... + addPass(Passes, new TargetData(M)); + + // DWD - Run the opt standard pass list as well. + AddStandardCompilePasses(Passes); + + if (!DisableOptimizations) { + // Now that composite has been compiled, scan through the module, looking + // for a main function. If main is defined, mark all other functions + // internal. + if (!DisableInternalize) + addPass(Passes, createInternalizePass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + addPass(Passes, createIPSCCPPass()); + + // Now that we internalized some globals, see if we can hack on them! + addPass(Passes, createGlobalOptimizerPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant... + addPass(Passes, createConstantMergePass()); + + // Remove unused arguments from functions... + addPass(Passes, createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + addPass(Passes, createInstructionCombiningPass()); + + if (!DisableInline) + addPass(Passes, createFunctionInliningPass()); // Inline small functions + + addPass(Passes, createPruneEHPass()); // Remove dead EH info + addPass(Passes, createGlobalOptimizerPass()); // Optimize globals again. + addPass(Passes, createGlobalDCEPass()); // Remove dead functions + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + addPass(Passes, createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + addPass(Passes, createInstructionCombiningPass()); + addPass(Passes, createJumpThreadingPass()); // Thread jumps. + addPass(Passes, createScalarReplAggregatesPass()); // Break up allocas + + // Run a few AA driven optimizations here and now, to cleanup the code. + addPass(Passes, createFunctionAttrsPass()); // Add nocapture + addPass(Passes, createGlobalsModRefPass()); // IP alias analysis + + addPass(Passes, createLICMPass()); // Hoist loop invariants + addPass(Passes, createGVNPass()); // Remove redundancies + addPass(Passes, createMemCpyOptPass()); // Remove dead memcpy's + addPass(Passes, createDeadStoreEliminationPass()); // Nuke dead stores + + // Cleanup and simplify the code after the scalar optimizations. + addPass(Passes, createInstructionCombiningPass()); + + addPass(Passes, createJumpThreadingPass()); // Thread jumps. + addPass(Passes, createPromoteMemoryToRegisterPass()); // Cleanup jumpthread. + + // Delete basic blocks, which optimization passes may have killed... + addPass(Passes, createCFGSimplificationPass()); + + // Now that we have optimized the program, discard unreachable functions... + addPass(Passes, createGlobalDCEPass()); + } + + // If the -s or -S command line options were specified, strip the symbols out + // of the resulting program to make it smaller. -s and -S are GNU ld options + // that we are supporting; they alias -strip-all and -strip-debug. + if (Strip || StripDebug) + addPass(Passes, createStripSymbolsPass(StripDebug && !Strip)); + +#if 0 + // Create a new optimization pass for each one specified on the command line + std::auto_ptr<TargetMachine> target; + for (unsigned i = 0; i < OptimizationList.size(); ++i) { + const PassInfo *Opt = OptimizationList[i]; + if (Opt->getNormalCtor()) + addPass(Passes, Opt->getNormalCtor()()); + else + std::cerr << "llvm-ld: cannot create pass: " << Opt->getPassName() + << "\n"; + } +#endif + + // The user's passes may leave cruft around. Clean up after them them but + // only if we haven't got DisableOptimizations set + if (!DisableOptimizations) { + addPass(Passes, createInstructionCombiningPass()); + addPass(Passes, createCFGSimplificationPass()); + addPass(Passes, createAggressiveDCEPass()); + addPass(Passes, createGlobalDCEPass()); + } + + // Make sure everything is still good. + if (!DontVerify) + Passes.add(createVerifierPass()); + + // Run our queue of passes all at once now, efficiently. + Passes.run(*M); +} + +} diff --git a/lib/Module/Passes.h b/lib/Module/Passes.h new file mode 100644 index 00000000..23205f75 --- /dev/null +++ b/lib/Module/Passes.h @@ -0,0 +1,132 @@ +//===-- Passes.h ------------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_PASSES_H +#define KLEE_PASSES_H + +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/IntrinsicLowering.h" + +namespace llvm { + class Function; + class Instruction; + class Module; + class TargetData; + class Type; +} + +namespace klee { + + /// RaiseAsmPass - This pass raises some common occurences of inline + /// asm which are used by glibc into normal LLVM IR. +class RaiseAsmPass : public llvm::ModulePass { + static char ID; + + llvm::Function *getIntrinsic(llvm::Module &M, + unsigned IID, + const llvm::Type **Tys, + unsigned NumTys); + llvm::Function *getIntrinsic(llvm::Module &M, + unsigned IID, + const llvm::Type *Ty0) { + return getIntrinsic(M, IID, &Ty0, 1); + } + + bool runOnInstruction(llvm::Module &M, llvm::Instruction *I); + +public: + RaiseAsmPass() : llvm::ModulePass((intptr_t) &ID) {} + + virtual bool runOnModule(llvm::Module &M); +}; + + // This is a module pass because it can add and delete module + // variables (via intrinsic lowering). +class IntrinsicCleanerPass : public llvm::ModulePass { + static char ID; + llvm::IntrinsicLowering *IL; + bool LowerIntrinsics; + + bool runOnBasicBlock(llvm::BasicBlock &b); +public: + IntrinsicCleanerPass(const llvm::TargetData &TD, + bool LI=true) + : llvm::ModulePass((intptr_t) &ID), + IL(new llvm::IntrinsicLowering(TD)), + LowerIntrinsics(LI) {} + ~IntrinsicCleanerPass() { delete IL; } + + virtual bool runOnModule(llvm::Module &M); +}; + + // performs two transformations which make interpretation + // easier and faster. + // + // 1) Ensure that all the PHI nodes in a basic block have + // the incoming block list in the same order. Thus the + // incoming block index only needs to be computed once + // for each transfer. + // + // 2) Ensure that no PHI node result is used as an argument to + // a subsequent PHI node in the same basic block. This allows + // the transfer to execute the instructions in order instead + // of in two passes. +class PhiCleanerPass : public llvm::FunctionPass { + static char ID; + +public: + PhiCleanerPass() : llvm::FunctionPass((intptr_t) &ID) {} + + virtual bool runOnFunction(llvm::Function &f); +}; + +class DivCheckPass : public llvm::ModulePass { + static char ID; +public: + DivCheckPass(): ModulePass((intptr_t) &ID) {} + virtual bool runOnModule(llvm::Module &M); +}; + +/// LowerSwitchPass - Replace all SwitchInst instructions with chained branch +/// instructions. Note that this cannot be a BasicBlock pass because it +/// modifies the CFG! +class LowerSwitchPass : public llvm::FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LowerSwitchPass() : FunctionPass((intptr_t) &ID) {} + + virtual bool runOnFunction(llvm::Function &F); + + struct SwitchCase { + llvm ::Constant *value; + llvm::BasicBlock *block; + + SwitchCase() : value(0), block(0) { } + SwitchCase(llvm::Constant *v, llvm::BasicBlock *b) : + value(v), block(b) { } + }; + + typedef std::vector<SwitchCase> CaseVector; + typedef std::vector<SwitchCase>::iterator CaseItr; + +private: + void processSwitchInst(llvm::SwitchInst *SI); + void switchConvert(CaseItr begin, + CaseItr end, + llvm::Value *value, + llvm::BasicBlock *origBlock, + llvm::BasicBlock *defaultBlock); +}; + +} + +#endif diff --git a/lib/Module/PhiCleaner.cpp b/lib/Module/PhiCleaner.cpp new file mode 100644 index 00000000..3d8d7867 --- /dev/null +++ b/lib/Module/PhiCleaner.cpp @@ -0,0 +1,83 @@ +//===-- PhiCleaner.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include <set> + +using namespace llvm; + +char klee::PhiCleanerPass::ID = 0; + +bool klee::PhiCleanerPass::runOnFunction(Function &f) { + bool changed = false; + + for (Function::iterator b = f.begin(), be = f.end(); b != be; ++b) { + BasicBlock::iterator it = b->begin(); + + if (it->getOpcode() == Instruction::PHI) { + PHINode *reference = cast<PHINode>(it); + + std::set<Value*> phis; + phis.insert(reference); + + unsigned numBlocks = reference->getNumIncomingValues(); + for (++it; isa<PHINode>(*it); ++it) { + PHINode *pi = cast<PHINode>(it); + + assert(numBlocks == pi->getNumIncomingValues()); + + // see if it is out of order + unsigned i; + for (i=0; i<numBlocks; i++) + if (pi->getIncomingBlock(i) != reference->getIncomingBlock(i)) + break; + + if (i!=numBlocks) { + std::vector<Value*> values; + values.reserve(numBlocks); + for (unsigned i=0; i<numBlocks; i++) + values[i] = pi->getIncomingValueForBlock(reference->getIncomingBlock(i)); + for (unsigned i=0; i<numBlocks; i++) { + pi->setIncomingBlock(i, reference->getIncomingBlock(i)); + pi->setIncomingValue(i, values[i]); + } + changed = true; + } + + // see if it uses any previously defined phi nodes + for (i=0; i<numBlocks; i++) { + Value *value = pi->getIncomingValue(i); + + if (phis.find(value) != phis.end()) { + // fix by making a "move" at the end of the incoming block + // to a new temporary, which is thus known not to be a phi + // result. we could be somewhat more efficient about this + // by sharing temps and by reordering phi instructions so + // this isn't completely necessary, but in the end this is + // just a pathological case which does not occur very + // often. + Instruction *tmp = + new BitCastInst(value, + value->getType(), + value->getName() + ".phiclean", + pi->getIncomingBlock(i)->getTerminator()); + pi->setIncomingValue(i, tmp); + } + + changed = true; + } + + phis.insert(pi); + } + } + } + + return changed; +} diff --git a/lib/Module/RaiseAsm.cpp b/lib/Module/RaiseAsm.cpp new file mode 100644 index 00000000..67fbf8ae --- /dev/null +++ b/lib/Module/RaiseAsm.cpp @@ -0,0 +1,69 @@ +//===-- RaiseAsm.cpp ------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include "llvm/InlineAsm.h" + +using namespace llvm; +using namespace klee; + +char RaiseAsmPass::ID = 0; + +Function *RaiseAsmPass::getIntrinsic(llvm::Module &M, + unsigned IID, + const Type **Tys, + unsigned NumTys) { + return Intrinsic::getDeclaration(&M, (llvm::Intrinsic::ID) IID, Tys, NumTys); +} + +// FIXME: This should just be implemented as a patch to +// X86TargetAsmInfo.cpp, then everyone will benefit. +bool RaiseAsmPass::runOnInstruction(Module &M, Instruction *I) { + if (CallInst *ci = dyn_cast<CallInst>(I)) { + if (InlineAsm *ia = dyn_cast<InlineAsm>(ci->getCalledValue())) { + const std::string &as = ia->getAsmString(); + const std::string &cs = ia->getConstraintString(); + const llvm::Type *T = ci->getType(); + + // bswaps + if (ci->getNumOperands() == 2 && + T == ci->getOperand(1)->getType() && + ((T == llvm::Type::Int16Ty && + as == "rorw $$8, ${0:w}" && + cs == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") || + (T == llvm::Type::Int32Ty && + as == "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}" && + cs == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"))) { + llvm::Value *Arg0 = ci->getOperand(1); + Function *F = getIntrinsic(M, Intrinsic::bswap, Arg0->getType()); + ci->setOperand(0, F); + return true; + } + } + } + + return false; +} + +bool RaiseAsmPass::runOnModule(Module &M) { + bool changed = false; + + for (Module::iterator fi = M.begin(), fe = M.end(); fi != fe; ++fi) { + for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi) { + for (BasicBlock::iterator ii = bi->begin(), ie = bi->end(); ii != ie;) { + Instruction *i = ii; + ++ii; + changed |= runOnInstruction(M, i); + } + } + } + + return changed; +} diff --git a/lib/README.txt b/lib/README.txt new file mode 100644 index 00000000..1750d900 --- /dev/null +++ b/lib/README.txt @@ -0,0 +1,18 @@ +The klee and kleaver code is organized as follows: + +lib/Basic - Low level support for both klee and kleaver which should + be independent of LLVM. + +lib/Support - Higher level support, but only used by klee. This can + use LLVM facilities. + +lib/Expr - The core kleaver expression library. + +lib/Solver - The kleaver solver library. + +lib/Module - klee facilities for working with LLVM modules, including + the shadow module/instruction structures we use during + execution. + +lib/Core - The core symbolic virtual machine. + diff --git a/lib/Solver/CachingSolver.cpp b/lib/Solver/CachingSolver.cpp new file mode 100644 index 00000000..517e133b --- /dev/null +++ b/lib/Solver/CachingSolver.cpp @@ -0,0 +1,241 @@ +//===-- CachingSolver.cpp - Caching expression solver ---------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "klee/Solver.h" + +#include "klee/Constraints.h" +#include "klee/Expr.h" +#include "klee/IncompleteSolver.h" +#include "klee/SolverImpl.h" + +#include "SolverStats.h" + +#include <tr1/unordered_map> + +using namespace klee; + +class CachingSolver : public SolverImpl { +private: + ref<Expr> canonicalizeQuery(ref<Expr> originalQuery, + bool &negationUsed); + + void cacheInsert(const Query& query, + IncompleteSolver::PartialValidity result); + + bool cacheLookup(const Query& query, + IncompleteSolver::PartialValidity &result); + + struct CacheEntry { + CacheEntry(const ConstraintManager &c, ref<Expr> q) + : constraints(c), query(q) {} + + CacheEntry(const CacheEntry &ce) + : constraints(ce.constraints), query(ce.query) {} + + ConstraintManager constraints; + ref<Expr> query; + + bool operator==(const CacheEntry &b) const { + return constraints==b.constraints && *query.get()==*b.query.get(); + } + }; + + struct CacheEntryHash { + unsigned operator()(const CacheEntry &ce) const { + unsigned result = ce.query.hash(); + + for (ConstraintManager::constraint_iterator it = ce.constraints.begin(); + it != ce.constraints.end(); ++it) + result ^= it->hash(); + + return result; + } + }; + + typedef std::tr1::unordered_map<CacheEntry, + IncompleteSolver::PartialValidity, + CacheEntryHash> cache_map; + + Solver *solver; + cache_map cache; + +public: + CachingSolver(Solver *s) : solver(s) {} + ~CachingSolver() { cache.clear(); delete solver; } + + bool computeValidity(const Query&, Solver::Validity &result); + bool computeTruth(const Query&, bool &isValid); + bool computeValue(const Query& query, ref<Expr> &result) { + return solver->impl->computeValue(query, result); + } + bool computeInitialValues(const Query& query, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution) { + return solver->impl->computeInitialValues(query, objects, values, + hasSolution); + } +}; + +/** @returns the canonical version of the given query. The reference + negationUsed is set to true if the original query was negated in + the canonicalization process. */ +ref<Expr> CachingSolver::canonicalizeQuery(ref<Expr> originalQuery, + bool &negationUsed) { + ref<Expr> negatedQuery = Expr::createNot(originalQuery); + + // select the "smaller" query to the be canonical representation + if (originalQuery.compare(negatedQuery) < 0) { + negationUsed = false; + return originalQuery; + } else { + negationUsed = true; + return negatedQuery; + } +} + +/** @returns true on a cache hit, false of a cache miss. Reference + value result only valid on a cache hit. */ +bool CachingSolver::cacheLookup(const Query& query, + IncompleteSolver::PartialValidity &result) { + bool negationUsed; + ref<Expr> canonicalQuery = canonicalizeQuery(query.expr, negationUsed); + + CacheEntry ce(query.constraints, canonicalQuery); + cache_map::iterator it = cache.find(ce); + + if (it != cache.end()) { + result = (negationUsed ? + IncompleteSolver::negatePartialValidity(it->second) : + it->second); + return true; + } + + return false; +} + +/// Inserts the given query, result pair into the cache. +void CachingSolver::cacheInsert(const Query& query, + IncompleteSolver::PartialValidity result) { + bool negationUsed; + ref<Expr> canonicalQuery = canonicalizeQuery(query.expr, negationUsed); + + CacheEntry ce(query.constraints, canonicalQuery); + IncompleteSolver::PartialValidity cachedResult = + (negationUsed ? IncompleteSolver::negatePartialValidity(result) : result); + + cache.insert(std::make_pair(ce, cachedResult)); +} + +bool CachingSolver::computeValidity(const Query& query, + Solver::Validity &result) { + IncompleteSolver::PartialValidity cachedResult; + bool tmp, cacheHit = cacheLookup(query, cachedResult); + + if (cacheHit) { + ++stats::queryCacheHits; + + switch(cachedResult) { + case IncompleteSolver::MustBeTrue: + result = Solver::True; + return true; + case IncompleteSolver::MustBeFalse: + result = Solver::False; + return true; + case IncompleteSolver::TrueOrFalse: + result = Solver::Unknown; + return true; + case IncompleteSolver::MayBeTrue: { + if (!solver->impl->computeTruth(query, tmp)) + return false; + if (tmp) { + cacheInsert(query, IncompleteSolver::MustBeTrue); + result = Solver::True; + return true; + } else { + cacheInsert(query, IncompleteSolver::TrueOrFalse); + result = Solver::Unknown; + return true; + } + } + case IncompleteSolver::MayBeFalse: { + if (!solver->impl->computeTruth(query.negateExpr(), tmp)) + return false; + if (tmp) { + cacheInsert(query, IncompleteSolver::MustBeFalse); + result = Solver::False; + return true; + } else { + cacheInsert(query, IncompleteSolver::TrueOrFalse); + result = Solver::Unknown; + return true; + } + } + default: assert(0 && "unreachable"); + } + } + + ++stats::queryCacheMisses; + + if (!solver->impl->computeValidity(query, result)) + return false; + + switch (result) { + case Solver::True: + cachedResult = IncompleteSolver::MustBeTrue; break; + case Solver::False: + cachedResult = IncompleteSolver::MustBeFalse; break; + default: + cachedResult = IncompleteSolver::TrueOrFalse; break; + } + + cacheInsert(query, cachedResult); + return true; +} + +bool CachingSolver::computeTruth(const Query& query, + bool &isValid) { + IncompleteSolver::PartialValidity cachedResult; + bool cacheHit = cacheLookup(query, cachedResult); + + // a cached result of MayBeTrue forces us to check whether + // a False assignment exists. + if (cacheHit && cachedResult != IncompleteSolver::MayBeTrue) { + ++stats::queryCacheHits; + isValid = (cachedResult == IncompleteSolver::MustBeTrue); + return true; + } + + ++stats::queryCacheMisses; + + // cache miss: query solver + if (!solver->impl->computeTruth(query, isValid)) + return false; + + if (isValid) { + cachedResult = IncompleteSolver::MustBeTrue; + } else if (cacheHit) { + // We know a true assignment exists, and query isn't valid, so + // must be TrueOrFalse. + assert(cachedResult == IncompleteSolver::MayBeTrue); + cachedResult = IncompleteSolver::TrueOrFalse; + } else { + cachedResult = IncompleteSolver::MayBeFalse; + } + + cacheInsert(query, cachedResult); + return true; +} + +/// + +Solver *klee::createCachingSolver(Solver *_solver) { + return new Solver(new CachingSolver(_solver)); +} diff --git a/lib/Solver/CexCachingSolver.cpp b/lib/Solver/CexCachingSolver.cpp new file mode 100644 index 00000000..79bc985d --- /dev/null +++ b/lib/Solver/CexCachingSolver.cpp @@ -0,0 +1,313 @@ +//===-- CexCachingSolver.cpp ----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Solver.h" + +#include "klee/Constraints.h" +#include "klee/Expr.h" +#include "klee/SolverImpl.h" +#include "klee/TimerStatIncrementer.h" +#include "klee/util/Assignment.h" +#include "klee/util/ExprUtil.h" +#include "klee/util/ExprVisitor.h" +#include "klee/Internal/ADT/MapOfSets.h" + +#include "SolverStats.h" + +#include "llvm/Support/CommandLine.h" + +using namespace klee; +using namespace llvm; + +namespace { + cl::opt<bool> + DebugCexCacheCheckBinding("debug-cex-cache-check-binding"); + + cl::opt<bool> + CexCacheTryAll("cex-cache-try-all", + cl::desc("try substituting all counterexamples before asking STP"), + cl::init(false)); + + cl::opt<bool> + CexCacheExperimental("cex-cache-exp", cl::init(false)); + +} + +/// + +typedef std::set< ref<Expr> > KeyType; + +struct AssignmentLessThan { + bool operator()(const Assignment *a, const Assignment *b) { + return a->bindings < b->bindings; + } +}; + + +class CexCachingSolver : public SolverImpl { + typedef std::set<Assignment*, AssignmentLessThan> assignmentsTable_ty; + + Solver *solver; + + MapOfSets<ref<Expr>, Assignment*> cache; + // memo table + assignmentsTable_ty assignmentsTable; + + bool searchForAssignment(KeyType &key, + Assignment *&result); + + bool lookupAssignment(const Query& query, Assignment *&result); + + bool getAssignment(const Query& query, Assignment *&result); + +public: + CexCachingSolver(Solver *_solver) : solver(_solver) {} + ~CexCachingSolver(); + + bool computeTruth(const Query&, bool &isValid); + bool computeValidity(const Query&, Solver::Validity &result); + bool computeValue(const Query&, ref<Expr> &result); + bool computeInitialValues(const Query&, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution); +}; + +/// + +struct NullAssignment { + bool operator()(Assignment *a) const { return !a; } +}; + +struct NonNullAssignment { + bool operator()(Assignment *a) const { return a!=0; } +}; + +struct NullOrSatisfyingAssignment { + KeyType &key; + + NullOrSatisfyingAssignment(KeyType &_key) : key(_key) {} + + bool operator()(Assignment *a) const { + return !a || a->satisfies(key.begin(), key.end()); + } +}; + +bool CexCachingSolver::searchForAssignment(KeyType &key, Assignment *&result) { + Assignment * const *lookup = cache.lookup(key); + if (lookup) { + result = *lookup; + return true; + } + + if (CexCacheTryAll) { + Assignment **lookup = cache.findSuperset(key, NonNullAssignment()); + if (!lookup) lookup = cache.findSubset(key, NullAssignment()); + if (lookup) { + result = *lookup; + return true; + } + for (assignmentsTable_ty::iterator it = assignmentsTable.begin(), + ie = assignmentsTable.end(); it != ie; ++it) { + Assignment *a = *it; + if (a->satisfies(key.begin(), key.end())) { + result = a; + return true; + } + } + } else { + // XXX which order? one is sure to be better + Assignment **lookup = cache.findSuperset(key, NonNullAssignment()); + if (!lookup) lookup = cache.findSubset(key, NullOrSatisfyingAssignment(key)); + if (lookup) { + result = *lookup; + return true; + } + } + + return false; +} + +bool CexCachingSolver::lookupAssignment(const Query &query, + Assignment *&result) { + KeyType key(query.constraints.begin(), query.constraints.end()); + ref<Expr> neg = Expr::createNot(query.expr); + if (neg.isConstant()) { + if (!neg.getConstantValue()) { + result = (Assignment*) 0; + return true; + } + } else { + key.insert(neg); + } + + return searchForAssignment(key, result); +} + +bool CexCachingSolver::getAssignment(const Query& query, Assignment *&result) { + KeyType key(query.constraints.begin(), query.constraints.end()); + ref<Expr> neg = Expr::createNot(query.expr); + if (neg.isConstant()) { + if (!neg.getConstantValue()) { + result = (Assignment*) 0; + return true; + } + } else { + key.insert(neg); + } + + if (!searchForAssignment(key, result)) { + // need to solve + + std::vector<const Array*> objects; + findSymbolicObjects(key.begin(), key.end(), objects); + + std::vector< std::vector<unsigned char> > values; + bool hasSolution; + if (!solver->impl->computeInitialValues(query, objects, values, + hasSolution)) + return false; + + Assignment *binding; + if (hasSolution) { + binding = new Assignment(objects, values); + + // memoization + std::pair<assignmentsTable_ty::iterator, bool> + res = assignmentsTable.insert(binding); + if (!res.second) { + delete binding; + binding = *res.first; + } + + if (DebugCexCacheCheckBinding) + assert(binding->satisfies(key.begin(), key.end())); + } else { + binding = (Assignment*) 0; + } + + result = binding; + cache.insert(key, binding); + } + + return true; +} + +/// + +CexCachingSolver::~CexCachingSolver() { + cache.clear(); + delete solver; + for (assignmentsTable_ty::iterator it = assignmentsTable.begin(), + ie = assignmentsTable.end(); it != ie; ++it) + delete *it; +} + +bool CexCachingSolver::computeValidity(const Query& query, + Solver::Validity &result) { + TimerStatIncrementer t(stats::cexCacheTime); + Assignment *a; + if (!getAssignment(query.withFalse(), a)) + return false; + assert(a && "computeValidity() must have assignment"); + ref<Expr> q = a->evaluate(query.expr); + assert(q.isConstant() && "assignment evaluation did not result in constant"); + + if (q.getConstantValue()) { + if (!getAssignment(query, a)) + return false; + result = !a ? Solver::True : Solver::Unknown; + } else { + if (!getAssignment(query.negateExpr(), a)) + return false; + result = !a ? Solver::False : Solver::Unknown; + } + + return true; +} + +bool CexCachingSolver::computeTruth(const Query& query, + bool &isValid) { + TimerStatIncrementer t(stats::cexCacheTime); + + // There is a small amount of redundancy here. We only need to know + // truth and do not really need to compute an assignment. This means + // that we could check the cache to see if we already know that + // state ^ query has no assignment. In that case, by the validity of + // state, we know that state ^ !query must have an assignment, and + // so query cannot be true (valid). This does get hits, but doesn't + // really seem to be worth the overhead. + + if (CexCacheExperimental) { + Assignment *a; + if (lookupAssignment(query.negateExpr(), a) && !a) + return false; + } + + Assignment *a; + if (!getAssignment(query, a)) + return false; + + isValid = !a; + + return true; +} + +bool CexCachingSolver::computeValue(const Query& query, + ref<Expr> &result) { + TimerStatIncrementer t(stats::cexCacheTime); + + Assignment *a; + if (!getAssignment(query.withFalse(), a)) + return false; + assert(a && "computeValue() must have assignment"); + result = a->evaluate(query.expr); + assert(result.isConstant() && + "assignment evaluation did not result in constant"); + return true; +} + +bool +CexCachingSolver::computeInitialValues(const Query& query, + const std::vector<const Array*> + &objects, + std::vector< std::vector<unsigned char> > + &values, + bool &hasSolution) { + TimerStatIncrementer t(stats::cexCacheTime); + Assignment *a; + if (!getAssignment(query, a)) + return false; + hasSolution = !!a; + + if (!a) + return true; + + // FIXME: We should use smarter assignment for result so we don't + // need redundant copy. + values = std::vector< std::vector<unsigned char> >(objects.size()); + for (unsigned i=0; i < objects.size(); ++i) { + const Array *os = objects[i]; + Assignment::bindings_ty::iterator it = a->bindings.find(os); + + if (it == a->bindings.end()) { + values[i] = std::vector<unsigned char>(os->size, 0); + } else { + values[i] = it->second; + } + } + + return true; +} + +/// + +Solver *klee::createCexCachingSolver(Solver *_solver) { + return new Solver(new CexCachingSolver(_solver)); +} diff --git a/lib/Solver/ConstantDivision.cpp b/lib/Solver/ConstantDivision.cpp new file mode 100644 index 00000000..c8f8f3d5 --- /dev/null +++ b/lib/Solver/ConstantDivision.cpp @@ -0,0 +1,146 @@ +//===-- ConstantDivision.cpp ----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ConstantDivision.h" + +#include "klee/util/Bits.h" + +#include <algorithm> +#include <cassert> + +namespace klee { + +/* Macros and functions which define the basic bit-level operations + * needed to implement quick division operations. + * + * Based on Hacker's Delight (2003) by Henry S. Warren, Jr. + */ + +/* 32 -- number of bits in the integer type on this architecture */ + +/* 2^32 -- NUM_BITS=32 requires 64 bits to represent this unsigned value */ +#define TWO_TO_THE_32_U64 (1ULL << 32) + +/* 2e31 -- NUM_BITS=32 requires 64 bits to represent this signed value */ +#define TWO_TO_THE_31_S64 (1LL << 31) + +/* ABS(x) -- positive x */ +#define ABS(x) ( ((x)>0)?x:-(x) ) /* fails if x is the min value of its type */ + +/* XSIGN(x) -- -1 if x<0 and 0 otherwise */ +#define XSIGN(x) ( (x) >> 31 ) + +/* LOG2_CEIL(x) -- logarithm base 2 of x, rounded up */ +#define LOG2_CEIL(x) ( 32 - ldz(x - 1) ) + +/* ones(x) -- counts the number of bits in x with the value 1 */ +static uint32_t ones( register uint32_t x ) { + x -= ((x >> 1) & 0x55555555); + x = (((x >> 2) & 0x33333333) + (x & 0x33333333)); + x = (((x >> 4) + x) & 0x0f0f0f0f); + x += (x >> 8); + x += (x >> 16); + + return( x & 0x0000003f ); +} + +/* ldz(x) -- counts the number of leading zeroes in a 32-bit word */ +static uint32_t ldz( register uint32_t x ) { + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + + return 32 - ones(x); +} + +/* exp_base_2(n) -- 2^n computed as an integer */ +static uint32_t exp_base_2( register int32_t n ) { + register uint32_t x = ~n & (n - 32); + x = x >> 31; + return( x << n ); +} + +// A simple algorithm: Iterate over all contiguous regions of 1 bits +// in x starting with the lowest bits. +// +// For a particular range where x is 1 for bits [low,high) then: +// 1) if the range is just one bit, simple add it +// 2) if the range is more than one bit, replace with an add +// of the high bit and a subtract of the low bit. we apply +// one useful optimization: if we were going to add the bit +// below the one we wish to subtract, we simply change that +// add to a subtract instead of subtracting the low bit itself. +// Obviously we must take care when high==64. +void ComputeMultConstants64(uint64_t multiplicand, + uint64_t &add, uint64_t &sub) { + uint64_t x = multiplicand; + add = sub = 0; + + while (x) { + // Determine rightmost contiguous region of 1s. + unsigned low = bits64::indexOfRightmostBit(x); + uint64_t lowbit = 1LL << low; + uint64_t p = x + lowbit; + uint64_t q = bits64::isolateRightmostBit(p); + unsigned high = q ? bits64::indexOfSingleBit(q) : 64; + + if (high==low+1) { // Just one bit... + add |= lowbit; + } else { + // Rewrite as +(1<<high) - (1<<low). + + // Optimize +(1<<x) - (1<<(x+1)) to -(1<<x). + if (low && (add & (lowbit>>1))) { + add ^= lowbit>>1; + sub ^= lowbit>>1; + } else { + sub |= lowbit; + } + + if (high!=64) + add |= 1LL << high; + } + + x = p ^ q; + } + + assert(multiplicand == add - sub); +} + +void ComputeUDivConstants32(uint32_t d, uint32_t &mprime, uint32_t &sh1, + uint32_t &sh2) { + int32_t l = LOG2_CEIL( d ); /* signed so l-1 => -1 when l=0 (see sh2) */ + uint32_t mid = exp_base_2(l) - d; + + mprime = (TWO_TO_THE_32_U64 * mid / d) + 1; + sh1 = std::min( l, 1 ); + sh2 = std::max( l-1, 0 ); +} + +void ComputeSDivConstants32(int32_t d, int32_t &mprime, int32_t &dsign, + int32_t &shpost ) { + uint64_t abs_d = ABS( (int64_t)d ); /* use 64-bits in case d is INT32_MIN */ + + /* LOG2_CEIL works on 32-bits, so we cast abs_d. The only possible value + * outside the 32-bit rep. is 2^31. This is special cased to save computer + * time since 64-bit routines would be overkill. */ + int32_t l = std::max( 1U, LOG2_CEIL((uint32_t)abs_d) ); + if( abs_d == TWO_TO_THE_31_S64 ) l = 31; + + uint32_t mid = exp_base_2( l - 1 ); + uint64_t m = TWO_TO_THE_32_U64 * mid / abs_d + 1ULL; + + mprime = m - TWO_TO_THE_32_U64; /* implicit cast to 32-bits signed */ + dsign = XSIGN( d ); + shpost = l - 1; +} + +} diff --git a/lib/Solver/ConstantDivision.h b/lib/Solver/ConstantDivision.h new file mode 100644 index 00000000..9e3e9c95 --- /dev/null +++ b/lib/Solver/ConstantDivision.h @@ -0,0 +1,51 @@ +//===-- ConstantDivision.h --------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef __UTIL_CONSTANTDIVISION_H__ +#define __UTIL_CONSTANTDIVISION_H__ + +#include <stdint.h> + +namespace klee { + +/// ComputeMultConstants64 - Compute add and sub such that add-sub==x, +/// while attempting to minimize the number of bits in add and sub +/// combined. +void ComputeMultConstants64(uint64_t x, uint64_t &add_out, + uint64_t &sub_out); + +/// Compute the constants to perform a quicker equivalent of a division of some +/// 32-bit unsigned integer n by a known constant d (also a 32-bit unsigned +/// integer). The constants to compute n/d without explicit division will be +/// stored in mprime, sh1, and sh2 (unsigned 32-bit integers). +/// +/// @param d - denominator (divisor) +/// +/// @param [out] mprime +/// @param [out] sh1 +/// @param [out] sh2 +void ComputeUDivConstants32(uint32_t d, uint32_t &mprime, uint32_t &sh1, + uint32_t &sh2); + +/// Compute the constants to perform a quicker equivalent of a division of some +/// 32-bit signed integer n by a known constant d (also a 32-bit signed +/// integer). The constants to compute n/d without explicit division will be +/// stored in mprime, dsign, and shpost (signed 32-bit integers). +/// +/// @param d - denominator (divisor) +/// +/// @param [out] mprime +/// @param [out] dsign +/// @param [out] shpost +void ComputeSDivConstants32(int32_t d, int32_t &mprime, int32_t &dsign, + int32_t &shpost); + +} + +#endif diff --git a/lib/Solver/FastCexSolver.cpp b/lib/Solver/FastCexSolver.cpp new file mode 100644 index 00000000..d2bc27c6 --- /dev/null +++ b/lib/Solver/FastCexSolver.cpp @@ -0,0 +1,959 @@ +//===-- FastCexSolver.cpp -------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Solver.h" + +#include "klee/Constraints.h" +#include "klee/Expr.h" +#include "klee/IncompleteSolver.h" +#include "klee/util/ExprEvaluator.h" +#include "klee/util/ExprRangeEvaluator.h" +#include "klee/util/ExprVisitor.h" +// FIXME: Use APInt. +#include "klee/Internal/Support/IntEvaluation.h" + +#include <iostream> +#include <sstream> +#include <cassert> +#include <map> +#include <vector> + +using namespace klee; + +/***/ + +//#define LOG +#ifdef LOG +std::ostream *theLog; +#endif + + // Hacker's Delight, pgs 58-63 +static uint64_t minOR(uint64_t a, uint64_t b, + uint64_t c, uint64_t d) { + uint64_t temp, m = ((uint64_t) 1)<<63; + while (m) { + if (~a & c & m) { + temp = (a | m) & -m; + if (temp <= b) { a = temp; break; } + } else if (a & ~c & m) { + temp = (c | m) & -m; + if (temp <= d) { c = temp; break; } + } + m >>= 1; + } + + return a | c; +} +static uint64_t maxOR(uint64_t a, uint64_t b, + uint64_t c, uint64_t d) { + uint64_t temp, m = ((uint64_t) 1)<<63; + + while (m) { + if (b & d & m) { + temp = (b - m) | (m - 1); + if (temp >= a) { b = temp; break; } + temp = (d - m) | (m -1); + if (temp >= c) { d = temp; break; } + } + m >>= 1; + } + + return b | d; +} +static uint64_t minAND(uint64_t a, uint64_t b, + uint64_t c, uint64_t d) { + uint64_t temp, m = ((uint64_t) 1)<<63; + while (m) { + if (~a & ~c & m) { + temp = (a | m) & -m; + if (temp <= b) { a = temp; break; } + temp = (c | m) & -m; + if (temp <= d) { c = temp; break; } + } + m >>= 1; + } + + return a & c; +} +static uint64_t maxAND(uint64_t a, uint64_t b, + uint64_t c, uint64_t d) { + uint64_t temp, m = ((uint64_t) 1)<<63; + while (m) { + if (b & ~d & m) { + temp = (b & ~m) | (m - 1); + if (temp >= a) { b = temp; break; } + } else if (~b & d & m) { + temp = (d & ~m) | (m - 1); + if (temp >= c) { d = temp; break; } + } + m >>= 1; + } + + return b & d; +} + +/// + +class ValueRange { +private: + uint64_t m_min, m_max; + +public: + ValueRange() : m_min(1),m_max(0) {} + ValueRange(uint64_t value) : m_min(value), m_max(value) {} + ValueRange(uint64_t _min, uint64_t _max) : m_min(_min), m_max(_max) {} + ValueRange(const ValueRange &b) : m_min(b.m_min), m_max(b.m_max) {} + + void print(std::ostream &os) const { + if (isFixed()) { + os << m_min; + } else { + os << "[" << m_min << "," << m_max << "]"; + } + } + + bool isEmpty() const { + return m_min>m_max; + } + bool contains(uint64_t value) const { + return this->intersects(ValueRange(value)); + } + bool intersects(const ValueRange &b) const { + return !this->set_intersection(b).isEmpty(); + } + + bool isFullRange(unsigned bits) { + return m_min==0 && m_max==bits64::maxValueOfNBits(bits); + } + + ValueRange set_intersection(const ValueRange &b) const { + return ValueRange(std::max(m_min,b.m_min), std::min(m_max,b.m_max)); + } + ValueRange set_union(const ValueRange &b) const { + return ValueRange(std::min(m_min,b.m_min), std::max(m_max,b.m_max)); + } + ValueRange set_difference(const ValueRange &b) const { + if (b.isEmpty() || b.m_min > m_max || b.m_max < m_min) { // no intersection + return *this; + } else if (b.m_min <= m_min && b.m_max >= m_max) { // empty + return ValueRange(1,0); + } else if (b.m_min <= m_min) { // one range out + // cannot overflow because b.m_max < m_max + return ValueRange(b.m_max+1, m_max); + } else if (b.m_max >= m_max) { + // cannot overflow because b.min > m_min + return ValueRange(m_min, b.m_min-1); + } else { + // two ranges, take bottom + return ValueRange(m_min, b.m_min-1); + } + } + ValueRange binaryAnd(const ValueRange &b) const { + // XXX + assert(!isEmpty() && !b.isEmpty() && "XXX"); + if (isFixed() && b.isFixed()) { + return ValueRange(m_min & b.m_min); + } else { + return ValueRange(minAND(m_min, m_max, b.m_min, b.m_max), + maxAND(m_min, m_max, b.m_min, b.m_max)); + } + } + ValueRange binaryAnd(uint64_t b) const { return binaryAnd(ValueRange(b)); } + ValueRange binaryOr(ValueRange b) const { + // XXX + assert(!isEmpty() && !b.isEmpty() && "XXX"); + if (isFixed() && b.isFixed()) { + return ValueRange(m_min | b.m_min); + } else { + return ValueRange(minOR(m_min, m_max, b.m_min, b.m_max), + maxOR(m_min, m_max, b.m_min, b.m_max)); + } + } + ValueRange binaryOr(uint64_t b) const { return binaryOr(ValueRange(b)); } + ValueRange binaryXor(ValueRange b) const { + if (isFixed() && b.isFixed()) { + return ValueRange(m_min ^ b.m_min); + } else { + uint64_t t = m_max | b.m_max; + while (!bits64::isPowerOfTwo(t)) + t = bits64::withoutRightmostBit(t); + return ValueRange(0, (t<<1)-1); + } + } + + ValueRange binaryShiftLeft(unsigned bits) const { + return ValueRange(m_min<<bits, m_max<<bits); + } + ValueRange binaryShiftRight(unsigned bits) const { + return ValueRange(m_min>>bits, m_max>>bits); + } + + ValueRange concat(const ValueRange &b, unsigned bits) const { + return binaryShiftLeft(bits).binaryOr(b); + } + ValueRange extract(uint64_t lowBit, uint64_t maxBit) const { + return binaryShiftRight(lowBit).binaryAnd(bits64::maxValueOfNBits(maxBit-lowBit)); + } + + ValueRange add(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + ValueRange sub(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + ValueRange mul(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + ValueRange udiv(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + ValueRange sdiv(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + ValueRange urem(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + ValueRange srem(const ValueRange &b, unsigned width) const { + return ValueRange(0, bits64::maxValueOfNBits(width)); + } + + // use min() to get value if true (XXX should we add a method to + // make code clearer?) + bool isFixed() const { return m_min==m_max; } + + bool operator==(const ValueRange &b) const { return m_min==b.m_min && m_max==b.m_max; } + bool operator!=(const ValueRange &b) const { return !(*this==b); } + + bool mustEqual(const uint64_t b) const { return m_min==m_max && m_min==b; } + bool mayEqual(const uint64_t b) const { return m_min<=b && m_max>=b; } + + bool mustEqual(const ValueRange &b) const { return isFixed() && b.isFixed() && m_min==b.m_min; } + bool mayEqual(const ValueRange &b) const { return this->intersects(b); } + + uint64_t min() const { + assert(!isEmpty() && "cannot get minimum of empty range"); + return m_min; + } + + uint64_t max() const { + assert(!isEmpty() && "cannot get maximum of empty range"); + return m_max; + } + + int64_t minSigned(unsigned bits) const { + assert((m_min>>bits)==0 && (m_max>>bits)==0 && + "range is outside given number of bits"); + + // if max allows sign bit to be set then it can be smallest value, + // otherwise since the range is not empty, min cannot have a sign + // bit + + uint64_t smallest = ((uint64_t) 1 << (bits-1)); + if (m_max >= smallest) { + return ints::sext(smallest, 64, bits); + } else { + return m_min; + } + } + + int64_t maxSigned(unsigned bits) const { + assert((m_min>>bits)==0 && (m_max>>bits)==0 && + "range is outside given number of bits"); + + uint64_t smallest = ((uint64_t) 1 << (bits-1)); + + // if max and min have sign bit then max is max, otherwise if only + // max has sign bit then max is largest signed integer, otherwise + // max is max + + if (m_min < smallest && m_max >= smallest) { + return smallest - 1; + } else { + return ints::sext(m_max, 64, bits); + } + } +}; + +inline std::ostream &operator<<(std::ostream &os, const ValueRange &vr) { + vr.print(os); + return os; +} + +// used to find all memory object ids and the maximum size of any +// object state that references them (for symbolic size). +class ObjectFinder : public ExprVisitor { +protected: + Action visitRead(const ReadExpr &re) { + addUpdates(re.updates); + return Action::doChildren(); + } + + // XXX nice if this information was cached somewhere, used by + // independence as well right? + void addUpdates(const UpdateList &ul) { + for (const UpdateNode *un=ul.head; un; un=un->next) { + visit(un->index); + visit(un->value); + } + + addObject(*ul.root); + } + +public: + void addObject(const Array& array) { + unsigned id = array.id; + std::map<unsigned,unsigned>::iterator it = results.find(id); + + // FIXME: Not 64-bit size clean. + if (it == results.end()) { + results[id] = (unsigned) array.size; + } else { + it->second = std::max(it->second, (unsigned) array.size); + } + } + +public: + std::map<unsigned, unsigned> results; +}; + +// XXX waste of space, rather have ByteValueRange +typedef ValueRange CexValueData; + +class CexObjectData { +public: + unsigned size; + CexValueData *values; + +public: + CexObjectData(unsigned _size) : size(_size), values(new CexValueData[size]) { + for (unsigned i=0; i<size; i++) + values[i] = ValueRange(0, 255); + } +}; + +class CexRangeEvaluator : public ExprRangeEvaluator<ValueRange> { +public: + std::map<unsigned, CexObjectData> &objectValues; + CexRangeEvaluator(std::map<unsigned, CexObjectData> &_objectValues) + : objectValues(_objectValues) {} + + ValueRange getInitialReadRange(const Array &os, ValueRange index) { + return ValueRange(0, 255); + } +}; + +class CexConstifier : public ExprEvaluator { +protected: + ref<Expr> getInitialValue(const Array& array, unsigned index) { + std::map<unsigned, CexObjectData>::iterator it = + objectValues.find(array.id); + assert(it != objectValues.end() && "missing object?"); + CexObjectData &cod = it->second; + + if (index >= cod.size) { + return ReadExpr::create(UpdateList(&array, true, 0), + ref<Expr>(index, Expr::Int32)); + } else { + CexValueData &cvd = cod.values[index]; + assert(cvd.min() == cvd.max() && "value is not fixed"); + return ref<Expr>(cvd.min(), Expr::Int8); + } + } + +public: + std::map<unsigned, CexObjectData> &objectValues; + CexConstifier(std::map<unsigned, CexObjectData> &_objectValues) + : objectValues(_objectValues) {} +}; + +class CexData { +public: + std::map<unsigned, CexObjectData> objectValues; + +public: + CexData(ObjectFinder &finder) { + for (std::map<unsigned,unsigned>::iterator it = finder.results.begin(), + ie = finder.results.end(); it != ie; ++it) { + objectValues.insert(std::pair<unsigned, CexObjectData>(it->first, + CexObjectData(it->second))); + } + } + ~CexData() { + for (std::map<unsigned, CexObjectData>::iterator it = objectValues.begin(), + ie = objectValues.end(); it != ie; ++it) + delete[] it->second.values; + } + + void forceExprToValue(ref<Expr> e, uint64_t value) { + forceExprToRange(e, CexValueData(value,value)); + } + + void forceExprToRange(ref<Expr> e, CexValueData range) { +#ifdef LOG + // *theLog << "force: " << e << " to " << range << "\n"; +#endif + switch (e.getKind()) { + case Expr::Constant: { + // rather a pity if the constant isn't in the range, but how can + // we use this? + break; + } + + // Special + + case Expr::NotOptimized: break; + + case Expr::Read: { + ReadExpr *re = static_ref_cast<ReadExpr>(e); + const Array *array = re->updates.root; + CexObjectData &cod = objectValues.find(array->id)->second; + + // XXX we need to respect the version here and object state chain + + if (re->index.isConstant() && + re->index.getConstantValue() < array->size) { + CexValueData &cvd = cod.values[re->index.getConstantValue()]; + CexValueData tmp = cvd.set_intersection(range); + + if (tmp.isEmpty()) { + if (range.isFixed()) // ranges conflict, if new one is fixed use that + cvd = range; + } else { + cvd = tmp; + } + } else { + // XXX fatal("XXX not implemented"); + } + + break; + } + + case Expr::Select: { + SelectExpr *se = static_ref_cast<SelectExpr>(e); + ValueRange cond = evalRangeForExpr(se->cond); + if (cond.isFixed()) { + if (cond.min()) { + forceExprToRange(se->trueExpr, range); + } else { + forceExprToRange(se->falseExpr, range); + } + } else { + // XXX imprecise... we have a choice here. One method is to + // simply force both sides into the specified range (since the + // condition is indetermined). This may lose in two ways, the + // first is that the condition chosen may limit further + // restrict the range in each of the children, however this is + // less of a problem as the range will be a superset of legal + // values. The other is if the condition ends up being forced + // by some other constraints, then we needlessly forced one + // side into the given range. + // + // The other method would be to force the condition to one + // side and force that side into the given range. This loses + // when we force the condition to an unsatisfiable value + // (either because the condition cannot be that, or the + // resulting range given that condition is not in the required + // range). + // + // Currently we just force both into the range. A hybrid would + // be to evaluate the ranges for each of the children... if + // one of the ranges happens to already be a subset of the + // required range then it may be preferable to force the + // condition to that side. + forceExprToRange(se->trueExpr, range); + forceExprToRange(se->falseExpr, range); + } + break; + } + + // XXX imprecise... the problem here is that extracting bits + // loses information about what bits are connected across the + // bytes. if a value can be 1 or 256 then either the top or + // lower byte is 0, but just extraction loses this information + // and will allow neither,one,or both to be 1. + // + // we can protect against this in a limited fashion by writing + // the extraction a byte at a time, then checking the evaluated + // value, isolating for that range, and continuing. + case Expr::Concat: { + ConcatExpr *ce = static_ref_cast<ConcatExpr>(e); + if (ce->is2ByteConcat()) { + forceExprToRange(ce->getKid(0), range.extract( 8, 16)); + forceExprToRange(ce->getKid(1), range.extract( 0, 8)); + } + else if (ce->is4ByteConcat()) { + forceExprToRange(ce->getKid(0), range.extract(24, 32)); + forceExprToRange(ce->getKid(1), range.extract(16, 24)); + forceExprToRange(ce->getKid(2), range.extract( 8, 16)); + forceExprToRange(ce->getKid(3), range.extract( 0, 8)); + } + else if (ce->is8ByteConcat()) { + forceExprToRange(ce->getKid(0), range.extract(56, 64)); + forceExprToRange(ce->getKid(1), range.extract(48, 56)); + forceExprToRange(ce->getKid(2), range.extract(40, 48)); + forceExprToRange(ce->getKid(3), range.extract(32, 40)); + forceExprToRange(ce->getKid(4), range.extract(24, 32)); + forceExprToRange(ce->getKid(5), range.extract(16, 24)); + forceExprToRange(ce->getKid(6), range.extract( 8, 16)); + forceExprToRange(ce->getKid(7), range.extract( 0, 8)); + } + + break; + } + + case Expr::Extract: { + // XXX + break; + } + + // Casting + + // Simply intersect the output range with the range of all + // possible outputs and then truncate to the desired number of + // bits. + + // For ZExt this simplifies to just intersection with the + // possible input range. + case Expr::ZExt: { + CastExpr *ce = static_ref_cast<CastExpr>(e); + unsigned inBits = ce->src.getWidth(); + ValueRange input = range.set_intersection(ValueRange(0, bits64::maxValueOfNBits(inBits))); + forceExprToRange(ce->src, input); + break; + } + // For SExt instead of doing the intersection we just take the output range + // minus the impossible values. This is nicer since it is a single interval. + case Expr::SExt: { + CastExpr *ce = static_ref_cast<CastExpr>(e); + unsigned inBits = ce->src.getWidth(); + unsigned outBits = ce->width; + ValueRange output = range.set_difference(ValueRange(1<<(inBits-1), + (bits64::maxValueOfNBits(outBits)- + bits64::maxValueOfNBits(inBits-1)-1))); + ValueRange input = output.binaryAnd(bits64::maxValueOfNBits(inBits)); + forceExprToRange(ce->src, input); + break; + } + + // Binary + + case Expr::And: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->getWidth()==Expr::Bool) { + if (range.isFixed()) { + ValueRange left = evalRangeForExpr(be->left); + ValueRange right = evalRangeForExpr(be->right); + + if (!range.min()) { + if (left.mustEqual(0) || right.mustEqual(0)) { + // all is well + } else { + // XXX heuristic, which order + + forceExprToValue(be->left, 0); + left = evalRangeForExpr(be->left); + + // see if that worked + if (!left.mustEqual(1)) + forceExprToValue(be->right, 0); + } + } else { + if (!left.mustEqual(1)) forceExprToValue(be->left, 1); + if (!right.mustEqual(1)) forceExprToValue(be->right, 1); + } + } + } else { + // XXX + } + break; + } + + case Expr::Or: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (be->getWidth()==Expr::Bool) { + if (range.isFixed()) { + ValueRange left = evalRangeForExpr(be->left); + ValueRange right = evalRangeForExpr(be->right); + + if (range.min()) { + if (left.mustEqual(1) || right.mustEqual(1)) { + // all is well + } else { + // XXX heuristic, which order? + + // force left to value we need + forceExprToValue(be->left, 1); + left = evalRangeForExpr(be->left); + + // see if that worked + if (!left.mustEqual(1)) + forceExprToValue(be->right, 1); + } + } else { + if (!left.mustEqual(0)) forceExprToValue(be->left, 0); + if (!right.mustEqual(0)) forceExprToValue(be->right, 0); + } + } + } else { + // XXX + } + break; + } + + case Expr::Xor: break; + + // Comparison + + case Expr::Eq: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + if (range.isFixed()) { + if (be->left.isConstant()) { + uint64_t value = be->left.getConstantValue(); + if (range.min()) { + forceExprToValue(be->right, value); + } else { + if (value==0) { + forceExprToRange(be->right, + CexValueData(1, + ints::sext(1, + be->right.getWidth(), + 1))); + } else { + // XXX heuristic / lossy, could be better to pick larger range? + forceExprToRange(be->right, CexValueData(0, value-1)); + } + } + } else { + // XXX what now + } + } + break; + } + + case Expr::Ult: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + + // XXX heuristic / lossy, what order if conflict + + if (range.isFixed()) { + ValueRange left = evalRangeForExpr(be->left); + ValueRange right = evalRangeForExpr(be->right); + + uint64_t maxValue = bits64::maxValueOfNBits(be->right.getWidth()); + + // XXX should deal with overflow (can lead to empty range) + + if (left.isFixed()) { + if (range.min()) { + forceExprToRange(be->right, CexValueData(left.min()+1, maxValue)); + } else { + forceExprToRange(be->right, CexValueData(0, left.min())); + } + } else if (right.isFixed()) { + if (range.min()) { + forceExprToRange(be->left, CexValueData(0, right.min()-1)); + } else { + forceExprToRange(be->left, CexValueData(right.min(), maxValue)); + } + } else { + // XXX ??? + } + } + break; + } + case Expr::Ule: { + BinaryExpr *be = static_ref_cast<BinaryExpr>(e); + + // XXX heuristic / lossy, what order if conflict + + if (range.isFixed()) { + ValueRange left = evalRangeForExpr(be->left); + ValueRange right = evalRangeForExpr(be->right); + + // XXX should deal with overflow (can lead to empty range) + + uint64_t maxValue = bits64::maxValueOfNBits(be->right.getWidth()); + if (left.isFixed()) { + if (range.min()) { + forceExprToRange(be->right, CexValueData(left.min(), maxValue)); + } else { + forceExprToRange(be->right, CexValueData(0, left.min()-1)); + } + } else if (right.isFixed()) { + if (range.min()) { + forceExprToRange(be->left, CexValueData(0, right.min())); + } else { + forceExprToRange(be->left, CexValueData(right.min()+1, maxValue)); + } + } else { + // XXX ??? + } + } + break; + } + + case Expr::Ne: + case Expr::Ugt: + case Expr::Uge: + case Expr::Sgt: + case Expr::Sge: + assert(0 && "invalid expressions (uncanonicalized"); + + default: + break; + } + } + + void fixValues() { + for (std::map<unsigned, CexObjectData>::iterator it = objectValues.begin(), + ie = objectValues.end(); it != ie; ++it) { + CexObjectData &cod = it->second; + for (unsigned i=0; i<cod.size; i++) { + CexValueData &cvd = cod.values[i]; + cvd = CexValueData(cvd.min() + (cvd.max()-cvd.min())/2); + } + } + } + + ValueRange evalRangeForExpr(ref<Expr> &e) { + CexRangeEvaluator ce(objectValues); + return ce.evaluate(e); + } + + bool exprMustBeValue(ref<Expr> e, uint64_t value) { + CexConstifier cc(objectValues); + ref<Expr> v = cc.visit(e); + if (!v.isConstant()) return false; + // XXX reenable once all reads and vars are fixed + // assert(v.isConstant() && "not all values have been fixed"); + return v.getConstantValue()==value; + } +}; + +/* *** */ + + +class FastCexSolver : public IncompleteSolver { +public: + FastCexSolver(); + ~FastCexSolver(); + + IncompleteSolver::PartialValidity computeTruth(const Query&); + bool computeValue(const Query&, ref<Expr> &result); + bool computeInitialValues(const Query&, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution); +}; + +FastCexSolver::FastCexSolver() { } + +FastCexSolver::~FastCexSolver() { } + +IncompleteSolver::PartialValidity +FastCexSolver::computeTruth(const Query& query) { +#ifdef LOG + std::ostringstream log; + theLog = &log; + // log << "------ start FastCexSolver::mustBeTrue ------\n"; + log << "-- QUERY --\n"; + unsigned i=0; + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + log << " C" << i++ << ": " << *it << ", \n"; + log << " Q : " << query.expr << "\n"; +#endif + + ObjectFinder of; + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + of.visit(*it); + of.visit(query.expr); + CexData cd(of); + + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + cd.forceExprToValue(*it, 1); + cd.forceExprToValue(query.expr, 0); + +#ifdef LOG + log << " -- ranges --\n"; + for (std::map<unsigned, CexObjectData>::iterator it = objectValues.begin(), + ie = objectValues.end(); it != ie; ++it) { + CexObjectData &cod = it->second; + log << " arr" << it->first << "[" << cod.size << "] = ["; + unsigned continueFrom=cod.size-1; + for (; continueFrom>0; continueFrom--) + if (cod.values[continueFrom-1]!=cod.values[continueFrom]) + break; + for (unsigned i=0; i<cod.size; i++) { + log << cod.values[i]; + if (i<cod.size-1) { + log << ", "; + if (i==continueFrom) { + log << "..."; + break; + } + } + } + log << "]\n"; + } +#endif + + // this could be done lazily of course + cd.fixValues(); + +#ifdef LOG + log << " -- fixed values --\n"; + for (std::map<unsigned, CexObjectData>::iterator it = objectValues.begin(), + ie = objectValues.end(); it != ie; ++it) { + CexObjectData &cod = it->second; + log << " arr" << it->first << "[" << cod.size << "] = ["; + unsigned continueFrom=cod.size-1; + for (; continueFrom>0; continueFrom--) + if (cod.values[continueFrom-1]!=cod.values[continueFrom]) + break; + for (unsigned i=0; i<cod.size; i++) { + log << cod.values[i]; + if (i<cod.size-1) { + log << ", "; + if (i==continueFrom) { + log << "..."; + break; + } + } + } + log << "]\n"; + } +#endif + + // check the result + + bool isGood = true; + + if (!cd.exprMustBeValue(query.expr, 0)) isGood = false; + + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + if (!cd.exprMustBeValue(*it, 1)) + isGood = false; + +#ifdef LOG + log << " -- evaluating result --\n"; + + i=0; + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) { + bool res = cd.exprMustBeValue(*it, 1); + log << " C" << i++ << ": " << (res?"true":"false") << "\n"; + } + log << " Q : " + << (cd.exprMustBeValue(query.expr, 0)?"true":"false") << "\n"; + + log << "\n\n"; +#endif + + return isGood ? IncompleteSolver::MayBeFalse : IncompleteSolver::None; +} + +bool FastCexSolver::computeValue(const Query& query, ref<Expr> &result) { + ObjectFinder of; + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + of.visit(*it); + of.visit(query.expr); + CexData cd(of); + + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + cd.forceExprToValue(*it, 1); + + // this could be done lazily of course + cd.fixValues(); + + // check the result + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + if (!cd.exprMustBeValue(*it, 1)) + return false; + + CexConstifier cc(cd.objectValues); + ref<Expr> value = cc.visit(query.expr); + + if (value.isConstant()) { + result = value; + return true; + } else { + return false; + } +} + +bool +FastCexSolver::computeInitialValues(const Query& query, + const std::vector<const Array*> + &objects, + std::vector< std::vector<unsigned char> > + &values, + bool &hasSolution) { + ObjectFinder of; + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + of.visit(*it); + of.visit(query.expr); + for (unsigned i = 0; i != objects.size(); ++i) + of.addObject(*objects[i]); + CexData cd(of); + + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + cd.forceExprToValue(*it, 1); + cd.forceExprToValue(query.expr, 0); + + // this could be done lazily of course + cd.fixValues(); + + // check the result + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + if (!cd.exprMustBeValue(*it, 1)) + return false; + if (!cd.exprMustBeValue(query.expr, 0)) + return false; + + hasSolution = true; + CexConstifier cc(cd.objectValues); + for (unsigned i = 0; i != objects.size(); ++i) { + const Array *array = objects[i]; + std::vector<unsigned char> data; + data.reserve(array->size); + + for (unsigned i=0; i < array->size; i++) { + ref<Expr> value = + cc.visit(ReadExpr::create(UpdateList(array, true, 0), + ConstantExpr::create(i, + kMachinePointerType))); + + if (value.isConstant()) { + data.push_back(value.getConstantValue()); + } else { + // FIXME: When does this happen? + return false; + } + } + + values.push_back(data); + } + + return true; +} + + +Solver *klee::createFastCexSolver(Solver *s) { + return new Solver(new StagedSolverImpl(new FastCexSolver(), s)); +} diff --git a/lib/Solver/IncompleteSolver.cpp b/lib/Solver/IncompleteSolver.cpp new file mode 100644 index 00000000..f473f70b --- /dev/null +++ b/lib/Solver/IncompleteSolver.cpp @@ -0,0 +1,136 @@ +//===-- IncompleteSolver.cpp ----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/IncompleteSolver.h" + +#include "klee/Constraints.h" + +using namespace klee; +using namespace llvm; + +/***/ + +IncompleteSolver::PartialValidity +IncompleteSolver::negatePartialValidity(PartialValidity pv) { + switch(pv) { + case MustBeTrue: return MustBeFalse; + case MustBeFalse: return MustBeTrue; + case MayBeTrue: return MayBeFalse; + case MayBeFalse: return MayBeTrue; + case TrueOrFalse: return TrueOrFalse; + default: assert(0 && "invalid partial validity"); + } +} + +IncompleteSolver::PartialValidity +IncompleteSolver::computeValidity(const Query& query) { + PartialValidity trueResult = computeTruth(query); + + if (trueResult == MustBeTrue) { + return MustBeTrue; + } else { + PartialValidity falseResult = computeTruth(query.negateExpr()); + + if (falseResult == MustBeTrue) { + return MustBeFalse; + } else { + bool trueCorrect = trueResult != None, + falseCorrect = falseResult != None; + + if (trueCorrect && falseCorrect) { + return TrueOrFalse; + } else if (trueCorrect) { // ==> trueResult == MayBeFalse + return MayBeFalse; + } else if (falseCorrect) { // ==> falseResult == MayBeFalse + return MayBeTrue; + } else { + return None; + } + } + } +} + +/***/ + +StagedSolverImpl::StagedSolverImpl(IncompleteSolver *_primary, + Solver *_secondary) + : primary(_primary), + secondary(_secondary) { +} + +StagedSolverImpl::~StagedSolverImpl() { + delete primary; + delete secondary; +} + +bool StagedSolverImpl::computeTruth(const Query& query, bool &isValid) { + IncompleteSolver::PartialValidity trueResult = primary->computeTruth(query); + + if (trueResult != IncompleteSolver::None) { + isValid = (trueResult == IncompleteSolver::MustBeTrue); + return true; + } + + return secondary->impl->computeTruth(query, isValid); +} + +bool StagedSolverImpl::computeValidity(const Query& query, + Solver::Validity &result) { + bool tmp; + + switch(primary->computeValidity(query)) { + case IncompleteSolver::MustBeTrue: + result = Solver::True; + break; + case IncompleteSolver::MustBeFalse: + result = Solver::False; + break; + case IncompleteSolver::TrueOrFalse: + result = Solver::Unknown; + break; + case IncompleteSolver::MayBeTrue: + if (!secondary->impl->computeTruth(query, tmp)) + return false; + result = tmp ? Solver::True : Solver::Unknown; + break; + case IncompleteSolver::MayBeFalse: + if (!secondary->impl->computeTruth(query.negateExpr(), tmp)) + return false; + result = tmp ? Solver::False : Solver::Unknown; + break; + default: + if (!secondary->impl->computeValidity(query, result)) + return false; + break; + } + + return true; +} + +bool StagedSolverImpl::computeValue(const Query& query, + ref<Expr> &result) { + if (primary->computeValue(query, result)) + return true; + + return secondary->impl->computeValue(query, result); +} + +bool +StagedSolverImpl::computeInitialValues(const Query& query, + const std::vector<const Array*> + &objects, + std::vector< std::vector<unsigned char> > + &values, + bool &hasSolution) { + if (primary->computeInitialValues(query, objects, values, hasSolution)) + return true; + + return secondary->impl->computeInitialValues(query, objects, values, + hasSolution); +} diff --git a/lib/Solver/IndependentSolver.cpp b/lib/Solver/IndependentSolver.cpp new file mode 100644 index 00000000..c966aff6 --- /dev/null +++ b/lib/Solver/IndependentSolver.cpp @@ -0,0 +1,314 @@ +//===-- IndependentSolver.cpp ---------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Solver.h" + +#include "klee/Expr.h" +#include "klee/Constraints.h" +#include "klee/SolverImpl.h" + +#include "klee/util/ExprUtil.h" + +#include "llvm/Support/Streams.h" + +#include <map> +#include <vector> + +using namespace klee; +using namespace llvm; + +template<class T> +class DenseSet { + typedef std::set<T> set_ty; + set_ty s; + +public: + DenseSet() {} + + void add(T x) { + s.insert(x); + } + void add(T start, T end) { + for (; start<end; start++) + s.insert(start); + } + + // returns true iff set is changed by addition + bool add(const DenseSet &b) { + bool modified = false; + for (typename set_ty::const_iterator it = b.s.begin(), ie = b.s.end(); + it != ie; ++it) { + if (modified || !s.count(*it)) { + modified = true; + s.insert(*it); + } + } + return modified; + } + + bool intersects(const DenseSet &b) { + for (typename set_ty::iterator it = s.begin(), ie = s.end(); + it != ie; ++it) + if (b.s.count(*it)) + return true; + return false; + } + + void print(std::ostream &os) const { + bool first = true; + os << "{"; + for (typename set_ty::iterator it = s.begin(), ie = s.end(); + it != ie; ++it) { + if (first) { + first = false; + } else { + os << ","; + } + os << *it; + } + os << "}"; + } +}; + +template<class T> +inline std::ostream &operator<<(std::ostream &os, const DenseSet<T> &dis) { + dis.print(os); + return os; +} + +class IndependentElementSet { + typedef std::map<const Array*, DenseSet<unsigned> > elements_ty; + elements_ty elements; + std::set<const Array*> wholeObjects; + +public: + IndependentElementSet() {} + IndependentElementSet(ref<Expr> e) { + std::vector< ref<ReadExpr> > reads; + findReads(e, /* visitUpdates= */ true, reads); + for (unsigned i = 0; i != reads.size(); ++i) { + ReadExpr *re = reads[i].get(); + if (re->updates.isRooted) { + const Array *array = re->updates.root; + if (!wholeObjects.count(array)) { + if (re->index.isConstant()) { + DenseSet<unsigned> &dis = elements[array]; + dis.add((unsigned) re->index.getConstantValue()); + } else { + elements_ty::iterator it2 = elements.find(array); + if (it2!=elements.end()) + elements.erase(it2); + wholeObjects.insert(array); + } + } + } + } + } + IndependentElementSet(const IndependentElementSet &ies) : + elements(ies.elements), + wholeObjects(ies.wholeObjects) {} + + IndependentElementSet &operator=(const IndependentElementSet &ies) { + elements = ies.elements; + wholeObjects = ies.wholeObjects; + return *this; + } + + void print(std::ostream &os) const { + os << "{"; + bool first = true; + for (std::set<const Array*>::iterator it = wholeObjects.begin(), + ie = wholeObjects.end(); it != ie; ++it) { + const Array *array = *it; + + if (first) { + first = false; + } else { + os << ", "; + } + + os << "MO" << array->id; + } + for (elements_ty::const_iterator it = elements.begin(), ie = elements.end(); + it != ie; ++it) { + const Array *array = it->first; + const DenseSet<unsigned> &dis = it->second; + + if (first) { + first = false; + } else { + os << ", "; + } + + os << "MO" << array->id << " : " << dis; + } + os << "}"; + } + + // more efficient when this is the smaller set + bool intersects(const IndependentElementSet &b) { + for (std::set<const Array*>::iterator it = wholeObjects.begin(), + ie = wholeObjects.end(); it != ie; ++it) { + const Array *array = *it; + if (b.wholeObjects.count(array) || + b.elements.find(array) != b.elements.end()) + return true; + } + for (elements_ty::iterator it = elements.begin(), ie = elements.end(); + it != ie; ++it) { + const Array *array = it->first; + if (b.wholeObjects.count(array)) + return true; + elements_ty::const_iterator it2 = b.elements.find(array); + if (it2 != b.elements.end()) { + if (it->second.intersects(it2->second)) + return true; + } + } + return false; + } + + // returns true iff set is changed by addition + bool add(const IndependentElementSet &b) { + bool modified = false; + for (std::set<const Array*>::const_iterator it = b.wholeObjects.begin(), + ie = b.wholeObjects.end(); it != ie; ++it) { + const Array *array = *it; + elements_ty::iterator it2 = elements.find(array); + if (it2!=elements.end()) { + modified = true; + elements.erase(it2); + wholeObjects.insert(array); + } else { + if (!wholeObjects.count(array)) { + modified = true; + wholeObjects.insert(array); + } + } + } + for (elements_ty::const_iterator it = b.elements.begin(), + ie = b.elements.end(); it != ie; ++it) { + const Array *array = it->first; + if (!wholeObjects.count(array)) { + elements_ty::iterator it2 = elements.find(array); + if (it2==elements.end()) { + modified = true; + elements.insert(*it); + } else { + if (it2->second.add(it->second)) + modified = true; + } + } + } + return modified; + } +}; + +inline std::ostream &operator<<(std::ostream &os, const IndependentElementSet &ies) { + ies.print(os); + return os; +} + +static +IndependentElementSet getIndependentConstraints(const Query& query, + std::vector< ref<Expr> > &result) { + IndependentElementSet eltsClosure(query.expr); + std::vector< std::pair<ref<Expr>, IndependentElementSet> > worklist; + + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + worklist.push_back(std::make_pair(*it, IndependentElementSet(*it))); + + // XXX This should be more efficient (in terms of low level copy stuff). + bool done = false; + do { + done = true; + std::vector< std::pair<ref<Expr>, IndependentElementSet> > newWorklist; + for (std::vector< std::pair<ref<Expr>, IndependentElementSet> >::iterator + it = worklist.begin(), ie = worklist.end(); it != ie; ++it) { + if (it->second.intersects(eltsClosure)) { + if (eltsClosure.add(it->second)) + done = false; + result.push_back(it->first); + } else { + newWorklist.push_back(*it); + } + } + worklist.swap(newWorklist); + } while (!done); + + if (0) { + std::set< ref<Expr> > reqset(result.begin(), result.end()); + llvm::cerr << "--\n"; + llvm::cerr << "Q: " << query.expr << "\n"; + llvm::cerr << "\telts: " << IndependentElementSet(query.expr) << "\n"; + int i = 0; + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) { + llvm::cerr << "C" << i++ << ": " << *it; + llvm::cerr << " " << (reqset.count(*it) ? "(required)" : "(independent)") << "\n"; + llvm::cerr << "\telts: " << IndependentElementSet(*it) << "\n"; + } + llvm::cerr << "elts closure: " << eltsClosure << "\n"; + } + + return eltsClosure; +} + +class IndependentSolver : public SolverImpl { +private: + Solver *solver; + +public: + IndependentSolver(Solver *_solver) + : solver(_solver) {} + ~IndependentSolver() { delete solver; } + + bool computeTruth(const Query&, bool &isValid); + bool computeValidity(const Query&, Solver::Validity &result); + bool computeValue(const Query&, ref<Expr> &result); + bool computeInitialValues(const Query& query, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution) { + return solver->impl->computeInitialValues(query, objects, values, + hasSolution); + } +}; + +bool IndependentSolver::computeValidity(const Query& query, + Solver::Validity &result) { + std::vector< ref<Expr> > required; + IndependentElementSet eltsClosure = + getIndependentConstraints(query, required); + ConstraintManager tmp(required); + return solver->impl->computeValidity(Query(tmp, query.expr), + result); +} + +bool IndependentSolver::computeTruth(const Query& query, bool &isValid) { + std::vector< ref<Expr> > required; + IndependentElementSet eltsClosure = + getIndependentConstraints(query, required); + ConstraintManager tmp(required); + return solver->impl->computeTruth(Query(tmp, query.expr), + isValid); +} + +bool IndependentSolver::computeValue(const Query& query, ref<Expr> &result) { + std::vector< ref<Expr> > required; + IndependentElementSet eltsClosure = + getIndependentConstraints(query, required); + ConstraintManager tmp(required); + return solver->impl->computeValue(Query(tmp, query.expr), result); +} + +Solver *klee::createIndependentSolver(Solver *s) { + return new Solver(new IndependentSolver(s)); +} diff --git a/lib/Solver/Makefile b/lib/Solver/Makefile new file mode 100755 index 00000000..11d3d330 --- /dev/null +++ b/lib/Solver/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Solver/Makefile ---------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleaverSolver +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Solver/PCLoggingSolver.cpp b/lib/Solver/PCLoggingSolver.cpp new file mode 100644 index 00000000..4b787acb --- /dev/null +++ b/lib/Solver/PCLoggingSolver.cpp @@ -0,0 +1,134 @@ +//===-- PCLoggingSolver.cpp -----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Solver.h" + +// FIXME: This should not be here. +#include "klee/ExecutionState.h" +#include "klee/Expr.h" +#include "klee/SolverImpl.h" +#include "klee/Statistics.h" +#include "klee/util/ExprPPrinter.h" +#include "klee/Internal/Support/QueryLog.h" +#include "klee/Internal/System/Time.h" + +#include "llvm/Support/CommandLine.h" + +#include <fstream> + +using namespace klee; +using namespace llvm; +using namespace klee::util; + +/// + +class PCLoggingSolver : public SolverImpl { + Solver *solver; + std::ofstream os; + ExprPPrinter *printer; + unsigned queryCount; + double startTime; + + void startQuery(const Query& query, const char *typeName) { + Statistic *S = theStatisticManager->getStatisticByName("Instructions"); + uint64_t instructions = S ? S->getValue() : 0; + os << "# Query " << queryCount++ << " -- " + << "Type: " << typeName << ", " + << "Instructions: " << instructions << "\n"; + printer->printQuery(os, query.constraints, query.expr); + + startTime = getWallTime(); + } + + void finishQuery(bool success) { + double delta = getWallTime() - startTime; + os << "# " << (success ? "OK" : "FAIL") << " -- " + << "Elapsed: " << delta << "\n"; + } + +public: + PCLoggingSolver(Solver *_solver, std::string path) + : solver(_solver), + os(path.c_str(), std::ios::trunc), + printer(ExprPPrinter::create(os)), + queryCount(0) { + } + ~PCLoggingSolver() { + delete printer; + delete solver; + } + + bool computeTruth(const Query& query, bool &isValid) { + startQuery(query, "Truth"); + bool success = solver->impl->computeTruth(query, isValid); + finishQuery(success); + if (success) + os << "# Is Valid: " << (isValid ? "true" : "false") << "\n"; + os << "\n"; + return success; + } + + bool computeValidity(const Query& query, Solver::Validity &result) { + startQuery(query, "Validity"); + bool success = solver->impl->computeValidity(query, result); + finishQuery(success); + if (success) + os << "# Validity: " << result << "\n"; + os << "\n"; + return success; + } + + bool computeValue(const Query& query, ref<Expr> &result) { + startQuery(query, "Value"); + bool success = solver->impl->computeValue(query, result); + finishQuery(success); + if (success) + os << "# Result: " << result << "\n"; + os << "\n"; + return success; + } + + bool computeInitialValues(const Query& query, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution) { + // FIXME: Add objects to output. + startQuery(query, "InitialValues"); + bool success = solver->impl->computeInitialValues(query, objects, + values, hasSolution); + finishQuery(success); + if (success) { + os << "# Solvable: " << (hasSolution ? "true" : "false") << "\n"; + if (hasSolution) { + std::vector< std::vector<unsigned char> >::iterator + values_it = values.begin(); + for (std::vector<const Array*>::const_iterator i = objects.begin(), + e = objects.end(); i != e; ++i, ++values_it) { + const Array *array = *i; + std::vector<unsigned char> &data = *values_it; + os << "# arr" << array->id << " = ["; + for (unsigned j = 0; j < array->size; j++) { + os << (int) data[j]; + if (j+1 < array->size) + os << ","; + } + os << "]\n"; + } + } + } + os << "\n"; + return success; + } +}; + +/// + +Solver *klee::createPCLoggingSolver(Solver *_solver, std::string path) { + return new Solver(new PCLoggingSolver(_solver, path)); +} diff --git a/lib/Solver/STPBuilder.cpp b/lib/Solver/STPBuilder.cpp new file mode 100644 index 00000000..33aee035 --- /dev/null +++ b/lib/Solver/STPBuilder.cpp @@ -0,0 +1,819 @@ +//===-- STPBuilder.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "STPBuilder.h" + +#include "klee/Expr.h" +#include "klee/Solver.h" +#include "klee/util/Bits.h" + +#include "ConstantDivision.h" +#include "SolverStats.h" + +#include "llvm/Support/CommandLine.h" + +#define vc_bvBoolExtract IAMTHESPAWNOFSATAN +// unclear return +#define vc_bvLeftShiftExpr IAMTHESPAWNOFSATAN +#define vc_bvRightShiftExpr IAMTHESPAWNOFSATAN +// bad refcnt'ng +#define vc_bvVar32LeftShiftExpr IAMTHESPAWNOFSATAN +#define vc_bvVar32RightShiftExpr IAMTHESPAWNOFSATAN +#define vc_bvVar32DivByPowOfTwoExpr IAMTHESPAWNOFSATAN +#define vc_bvCreateMemoryArray IAMTHESPAWNOFSATAN +#define vc_bvReadMemoryArray IAMTHESPAWNOFSATAN +#define vc_bvWriteToMemoryArray IAMTHESPAWNOFSATAN + +#include <algorithm> // max, min +#include <cassert> +#include <iostream> +#include <map> +#include <sstream> +#include <vector> + +using namespace klee; + +namespace { + llvm::cl::opt<bool> + UseConstructHash("use-construct-hash", + llvm::cl::desc("Use hash-consing during STP query construction."), + llvm::cl::init(true)); +} + +/// + +/***/ + +STPBuilder::STPBuilder(::VC _vc, bool _optimizeDivides) + : vc(_vc), optimizeDivides(_optimizeDivides) +{ + tempVars[0] = buildVar("__tmpInt8", 8); + tempVars[1] = buildVar("__tmpInt16", 16); + tempVars[2] = buildVar("__tmpInt32", 32); + tempVars[3] = buildVar("__tmpInt64", 64); +} + +STPBuilder::~STPBuilder() { +} + +/// + +/* Warning: be careful about what c_interface functions you use. Some of + them look like they cons memory but in fact don't, which is bad when + you call vc_DeleteExpr on them. */ + +::VCExpr STPBuilder::buildVar(const char *name, unsigned width) { + // XXX don't rebuild if this stuff cons's + ::Type t = (width==1) ? vc_boolType(vc) : vc_bvType(vc, width); + ::VCExpr res = vc_varExpr(vc, (char*) name, t); + vc_DeleteExpr(t); + return res; +} + +::VCExpr STPBuilder::buildArray(const char *name, unsigned indexWidth, unsigned valueWidth) { + // XXX don't rebuild if this stuff cons's + ::Type t1 = vc_bvType(vc, indexWidth); + ::Type t2 = vc_bvType(vc, valueWidth); + ::Type t = vc_arrayType(vc, t1, t2); + ::VCExpr res = vc_varExpr(vc, (char*) name, t); + vc_DeleteExpr(t); + vc_DeleteExpr(t2); + vc_DeleteExpr(t1); + return res; +} + +ExprHandle STPBuilder::getTempVar(Expr::Width w) { + switch (w) { + case Expr::Int8: return tempVars[0]; + case Expr::Int16: return tempVars[1]; + case Expr::Int32: return tempVars[2]; + case Expr::Int64: return tempVars[3]; + default: + assert(0 && "invalid type"); + } +} + +ExprHandle STPBuilder::getTrue() { + return vc_trueExpr(vc); +} +ExprHandle STPBuilder::getFalse() { + return vc_falseExpr(vc); +} +ExprHandle STPBuilder::bvOne(unsigned width) { + return bvConst32(width, 1); +} +ExprHandle STPBuilder::bvZero(unsigned width) { + return bvConst32(width, 0); +} +ExprHandle STPBuilder::bvMinusOne(unsigned width) { + return bvConst64(width, (int64_t) -1); +} +ExprHandle STPBuilder::bvConst32(unsigned width, uint32_t value) { + return vc_bvConstExprFromInt(vc, width, value); +} +ExprHandle STPBuilder::bvConst64(unsigned width, uint64_t value) { + return vc_bvConstExprFromLL(vc, width, value); +} + +ExprHandle STPBuilder::bvBoolExtract(ExprHandle expr, int bit) { + return vc_eqExpr(vc, bvExtract(expr, bit, bit), bvOne(1)); +} +ExprHandle STPBuilder::bvExtract(ExprHandle expr, unsigned top, unsigned bottom) { + return vc_bvExtract(vc, expr, top, bottom); +} +ExprHandle STPBuilder::eqExpr(ExprHandle a, ExprHandle b) { + return vc_eqExpr(vc, a, b); +} + +// logical right shift +ExprHandle STPBuilder::bvRightShift(ExprHandle expr, unsigned amount, unsigned shiftBits) { + unsigned width = vc_getBVLength(vc, expr); + unsigned shift = amount & ((1<<shiftBits) - 1); + + if (shift==0) { + return expr; + } else if (shift>=width) { + return bvZero(width); + } else { + return vc_bvConcatExpr(vc, + bvZero(shift), + bvExtract(expr, width - 1, shift)); + } +} + +// logical left shift +ExprHandle STPBuilder::bvLeftShift(ExprHandle expr, unsigned amount, unsigned shiftBits) { + unsigned width = vc_getBVLength(vc, expr); + unsigned shift = amount & ((1<<shiftBits) - 1); + + if (shift==0) { + return expr; + } else if (shift>=width) { + return bvZero(width); + } else { + // stp shift does "expr @ [0 x s]" which we then have to extract, + // rolling our own gives slightly smaller exprs + return vc_bvConcatExpr(vc, + bvExtract(expr, width - shift - 1, 0), + bvZero(shift)); + } +} + +// left shift by a variable amount on an expression of the specified width +ExprHandle STPBuilder::bvVarLeftShift(ExprHandle expr, ExprHandle amount, unsigned width) { + ExprHandle res = bvZero(width); + + int shiftBits = getShiftBits( width ); + + //get the shift amount (looking only at the bits appropriate for the given width) + ExprHandle shift = vc_bvExtract( vc, amount, shiftBits - 1, 0 ); + + //construct a big if-then-elif-elif-... with one case per possible shift amount + for( int i=width-1; i>=0; i-- ) { + res = vc_iteExpr(vc, + eqExpr(shift, bvConst32(shiftBits, i)), + bvLeftShift(expr, i, shiftBits), + res); + } + return res; +} + +// logical right shift by a variable amount on an expression of the specified width +ExprHandle STPBuilder::bvVarRightShift(ExprHandle expr, ExprHandle amount, unsigned width) { + ExprHandle res = bvZero(width); + + int shiftBits = getShiftBits( width ); + + //get the shift amount (looking only at the bits appropriate for the given width) + ExprHandle shift = vc_bvExtract( vc, amount, shiftBits - 1, 0 ); + + //construct a big if-then-elif-elif-... with one case per possible shift amount + for( int i=width-1; i>=0; i-- ) { + res = vc_iteExpr(vc, + eqExpr(shift, bvConst32(shiftBits, i)), + bvRightShift(expr, i, shiftBits), + res); + } + + return res; +} + +// arithmetic right shift by a variable amount on an expression of the specified width +ExprHandle STPBuilder::bvVarArithRightShift(ExprHandle expr, ExprHandle amount, unsigned width) { + int shiftBits = getShiftBits( width ); + + //get the shift amount (looking only at the bits appropriate for the given width) + ExprHandle shift = vc_bvExtract( vc, amount, shiftBits - 1, 0 ); + + //get the sign bit to fill with + ExprHandle signedBool = bvBoolExtract(expr, width-1); + + //start with the result if shifting by width-1 + ExprHandle res = constructAShrByConstant(expr, width-1, signedBool, shiftBits); + + //construct a big if-then-elif-elif-... with one case per possible shift amount + // XXX more efficient to move the ite on the sign outside all exprs? + // XXX more efficient to sign extend, right shift, then extract lower bits? + for( int i=width-2; i>=0; i-- ) { + res = vc_iteExpr(vc, + eqExpr(shift, bvConst32(shiftBits,i)), + constructAShrByConstant(expr, + i, + signedBool, + shiftBits), + res); + } + + return res; +} + +ExprHandle STPBuilder::constructAShrByConstant(ExprHandle expr, + unsigned amount, + ExprHandle isSigned, + unsigned shiftBits) { + unsigned width = vc_getBVLength(vc, expr); + unsigned shift = amount & ((1<<shiftBits) - 1); + + if (shift==0) { + return expr; + } else if (shift>=width-1) { + return vc_iteExpr(vc, isSigned, bvMinusOne(width), bvZero(width)); + } else { + return vc_iteExpr(vc, + isSigned, + ExprHandle(vc_bvConcatExpr(vc, + bvMinusOne(shift), + bvExtract(expr, width - 1, shift))), + bvRightShift(expr, shift, shiftBits)); + } +} + +ExprHandle STPBuilder::constructMulByConstant(ExprHandle expr, unsigned width, uint64_t x) { + unsigned shiftBits = getShiftBits(width); + uint64_t add, sub; + ExprHandle res = 0; + + // expr*x == expr*(add-sub) == expr*add - expr*sub + ComputeMultConstants64(x, add, sub); + + // legal, these would overflow completely + add = bits64::truncateToNBits(add, width); + sub = bits64::truncateToNBits(sub, width); + + for (int j=63; j>=0; j--) { + uint64_t bit = 1LL << j; + + if ((add&bit) || (sub&bit)) { + assert(!((add&bit) && (sub&bit)) && "invalid mult constants"); + ExprHandle op = bvLeftShift(expr, j, shiftBits); + + if (add&bit) { + if (res) { + res = vc_bvPlusExpr(vc, width, res, op); + } else { + res = op; + } + } else { + if (res) { + res = vc_bvMinusExpr(vc, width, res, op); + } else { + res = vc_bvUMinusExpr(vc, op); + } + } + } + } + + if (!res) + res = bvZero(width); + + return res; +} + +/* + * Compute the 32-bit unsigned integer division of n by a divisor d based on + * the constants derived from the constant divisor d. + * + * Returns n/d without doing explicit division. The cost is 2 adds, 3 shifts, + * and a (64-bit) multiply. + * + * @param n numerator (dividend) as an expression + * @param width number of bits used to represent the value + * @param d the divisor + * + * @return n/d without doing explicit division + */ +ExprHandle STPBuilder::constructUDivByConstant(ExprHandle expr_n, unsigned width, uint64_t d) { + assert(width==32 && "can only compute udiv constants for 32-bit division"); + + // Compute the constants needed to compute n/d for constant d w/o + // division by d. + uint32_t mprime, sh1, sh2; + ComputeUDivConstants32(d, mprime, sh1, sh2); + ExprHandle expr_sh1 = bvConst32( 32, sh1); + ExprHandle expr_sh2 = bvConst32( 32, sh2); + + // t1 = MULUH(mprime, n) = ( (uint64_t)mprime * (uint64_t)n ) >> 32 + ExprHandle expr_n_64 = vc_bvConcatExpr( vc, bvZero(32), expr_n ); //extend to 64 bits + ExprHandle t1_64bits = constructMulByConstant( expr_n_64, 64, (uint64_t)mprime ); + ExprHandle t1 = vc_bvExtract( vc, t1_64bits, 63, 32 ); //upper 32 bits + + // n/d = (((n - t1) >> sh1) + t1) >> sh2; + ExprHandle n_minus_t1 = vc_bvMinusExpr( vc, width, expr_n, t1 ); + ExprHandle shift_sh1 = bvVarRightShift( n_minus_t1, expr_sh1, 32 ); + ExprHandle plus_t1 = vc_bvPlusExpr( vc, width, shift_sh1, t1 ); + ExprHandle res = bvVarRightShift( plus_t1, expr_sh2, 32 ); + + return res; +} + +/* + * Compute the 32-bitnsigned integer division of n by a divisor d based on + * the constants derived from the constant divisor d. + * + * Returns n/d without doing explicit division. The cost is 3 adds, 3 shifts, + * a (64-bit) multiply, and an XOR. + * + * @param n numerator (dividend) as an expression + * @param width number of bits used to represent the value + * @param d the divisor + * + * @return n/d without doing explicit division + */ +ExprHandle STPBuilder::constructSDivByConstant(ExprHandle expr_n, unsigned width, uint64_t d) { + assert(width==32 && "can only compute udiv constants for 32-bit division"); + + // Compute the constants needed to compute n/d for constant d w/o division by d. + int32_t mprime, dsign, shpost; + ComputeSDivConstants32(d, mprime, dsign, shpost); + ExprHandle expr_dsign = bvConst32( 32, dsign); + ExprHandle expr_shpost = bvConst32( 32, shpost); + + // q0 = n + MULSH( mprime, n ) = n + (( (int64_t)mprime * (int64_t)n ) >> 32) + int64_t mprime_64 = (int64_t)mprime; + + ExprHandle expr_n_64 = vc_bvSignExtend( vc, expr_n, 64 ); + ExprHandle mult_64 = constructMulByConstant( expr_n_64, 64, mprime_64 ); + ExprHandle mulsh = vc_bvExtract( vc, mult_64, 63, 32 ); //upper 32-bits + ExprHandle n_plus_mulsh = vc_bvPlusExpr( vc, width, expr_n, mulsh ); + + // Improved variable arithmetic right shift: sign extend, shift, + // extract. + ExprHandle extend_npm = vc_bvSignExtend( vc, n_plus_mulsh, 64 ); + ExprHandle shift_npm = bvVarRightShift( extend_npm, expr_shpost, 64 ); + ExprHandle shift_shpost = vc_bvExtract( vc, shift_npm, 31, 0 ); //lower 32-bits + + // XSIGN(n) is -1 if n is negative, positive one otherwise + ExprHandle is_signed = bvBoolExtract( expr_n, 31 ); + ExprHandle neg_one = bvMinusOne(32); + ExprHandle xsign_of_n = vc_iteExpr( vc, is_signed, neg_one, bvZero(32) ); + + // q0 = (n_plus_mulsh >> shpost) - XSIGN(n) + ExprHandle q0 = vc_bvMinusExpr( vc, width, shift_shpost, xsign_of_n ); + + // n/d = (q0 ^ dsign) - dsign + ExprHandle q0_xor_dsign = vc_bvXorExpr( vc, q0, expr_dsign ); + ExprHandle res = vc_bvMinusExpr( vc, width, q0_xor_dsign, expr_dsign ); + + return res; +} + +::VCExpr STPBuilder::getInitialArray(const Array *root) { + if (root->stpInitialArray) { + return root->stpInitialArray; + } else { + char buf[32]; + sprintf(buf, "arr%d", root->id); + root->stpInitialArray = buildArray(buf, 32, 8); + return root->stpInitialArray; + } +} + +ExprHandle STPBuilder::getInitialRead(const Array *root, unsigned index) { + return vc_readExpr(vc, getInitialArray(root), bvConst32(32, index)); +} + +::VCExpr STPBuilder::getArrayForUpdate(const Array *root, + const UpdateNode *un) { + if (!un) { + return getInitialArray(root); + } else { + // FIXME: This really needs to be non-recursive. + if (!un->stpArray) + un->stpArray = vc_writeExpr(vc, + getArrayForUpdate(root, un->next), + construct(un->index, 0), + construct(un->value, 0)); + + return un->stpArray; + } +} + +/** if *width_out!=1 then result is a bitvector, + otherwise it is a bool */ +ExprHandle STPBuilder::construct(ref<Expr> e, int *width_out) { + if (!UseConstructHash || e.isConstant()) { + return constructActual(e, width_out); + } else { + ExprHashMap< std::pair<ExprHandle, unsigned> >::iterator it = + constructed.find(e); + if (it!=constructed.end()) { + if (width_out) + *width_out = it->second.second; + return it->second.first; + } else { + int width; + if (!width_out) width_out = &width; + ExprHandle res = constructActual(e, width_out); + constructed.insert(std::make_pair(e, std::make_pair(res, *width_out))); + return res; + } + } +} + + +/** if *width_out!=1 then result is a bitvector, + otherwise it is a bool */ +ExprHandle STPBuilder::constructActual(ref<Expr> e, int *width_out) { + int width; + if (!width_out) width_out = &width; + + ++stats::queryConstructs; + + switch(e.getKind()) { + + case Expr::Constant: { + uint64_t asUInt64 = e.getConstantValue(); + *width_out = e.getWidth(); + + if (*width_out > 64) + assert(0 && "constructActual: width > 64"); + + if (*width_out == 1) + return asUInt64 ? getTrue() : getFalse(); + else if (*width_out <= 32) + return bvConst32(*width_out, asUInt64); + else return bvConst64(*width_out, asUInt64); + } + + // Special + case Expr::NotOptimized: { + NotOptimizedExpr *noe = static_ref_cast<NotOptimizedExpr>(e); + return construct(noe->src, width_out); + } + + case Expr::Read: { + ReadExpr *re = static_ref_cast<ReadExpr>(e); + *width_out = 8; + return vc_readExpr(vc, + getArrayForUpdate(re->updates.root, re->updates.head), + construct(re->index, 0)); + } + + case Expr::Select: { + SelectExpr *se = static_ref_cast<SelectExpr>(e); + ExprHandle cond = construct(se->cond, 0); + ExprHandle tExpr = construct(se->trueExpr, width_out); + ExprHandle fExpr = construct(se->falseExpr, width_out); + return vc_iteExpr(vc, cond, tExpr, fExpr); + } + + case Expr::Concat: { + ConcatExpr *ce = static_ref_cast<ConcatExpr>(e); + unsigned numKids = ce->getNumKids(); + ExprHandle res = construct(ce->getKid(numKids-1), 0); + for (int i=numKids-2; i>=0; i--) { + res = vc_bvConcatExpr(vc, construct(ce->getKid(i), 0), res); + } + *width_out = ce->getWidth(); + return res; + } + + case Expr::Extract: { + ExtractExpr *ee = static_ref_cast<ExtractExpr>(e); + ExprHandle src = construct(ee->expr, width_out); + *width_out = ee->getWidth(); + if (*width_out==1) { + return bvBoolExtract(src, 0); + } else { + return vc_bvExtract(vc, src, ee->offset + *width_out - 1, ee->offset); + } + } + + // Casting + + case Expr::ZExt: { + int srcWidth; + CastExpr *ce = static_ref_cast<CastExpr>(e); + ExprHandle src = construct(ce->src, &srcWidth); + *width_out = ce->getWidth(); + if (srcWidth==1) { + return vc_iteExpr(vc, src, bvOne(*width_out), bvZero(*width_out)); + } else { + return vc_bvConcatExpr(vc, bvZero(*width_out-srcWidth), src); + } + } + + case Expr::SExt: { + int srcWidth; + CastExpr *ce = static_ref_cast<CastExpr>(e); + ExprHandle src = construct(ce->src, &srcWidth); + *width_out = ce->getWidth(); + if (srcWidth==1) { + return vc_iteExpr(vc, src, bvMinusOne(*width_out), bvZero(*width_out)); + } else { + return vc_bvSignExtend(vc, src, *width_out); + } + } + + // Arithmetic + + case Expr::Add: { + AddExpr *ae = static_ref_cast<AddExpr>(e); + ExprHandle left = construct(ae->left, width_out); + ExprHandle right = construct(ae->right, width_out); + assert(*width_out!=1 && "uncanonicalized add"); + return vc_bvPlusExpr(vc, *width_out, left, right); + } + + case Expr::Sub: { + SubExpr *se = static_ref_cast<SubExpr>(e); + ExprHandle left = construct(se->left, width_out); + ExprHandle right = construct(se->right, width_out); + assert(*width_out!=1 && "uncanonicalized sub"); + return vc_bvMinusExpr(vc, *width_out, left, right); + } + + case Expr::Mul: { + MulExpr *me = static_ref_cast<MulExpr>(e); + ExprHandle right = construct(me->right, width_out); + assert(*width_out!=1 && "uncanonicalized mul"); + + if (me->left.isConstant()) { + return constructMulByConstant(right, *width_out, me->left.getConstantValue()); + } else { + ExprHandle left = construct(me->left, width_out); + return vc_bvMultExpr(vc, *width_out, left, right); + } + } + + case Expr::UDiv: { + UDivExpr *de = static_ref_cast<UDivExpr>(e); + ExprHandle left = construct(de->left, width_out); + assert(*width_out!=1 && "uncanonicalized udiv"); + + if (de->right.isConstant()) { + uint64_t divisor = de->right.getConstantValue(); + + if (bits64::isPowerOfTwo(divisor)) { + return bvRightShift(left, + bits64::indexOfSingleBit(divisor), + getShiftBits(*width_out)); + } else if (optimizeDivides) { + if (*width_out == 32) //only works for 32-bit division + return constructUDivByConstant( left, *width_out, (uint32_t)divisor ); + } + } + + ExprHandle right = construct(de->right, width_out); + return vc_bvDivExpr(vc, *width_out, left, right); + } + + case Expr::SDiv: { + SDivExpr *de = static_ref_cast<SDivExpr>(e); + ExprHandle left = construct(de->left, width_out); + assert(*width_out!=1 && "uncanonicalized sdiv"); + + if (de->right.isConstant()) { + uint64_t divisor = de->right.getConstantValue(); + + if (optimizeDivides) { + if (*width_out == 32) //only works for 32-bit division + return constructSDivByConstant( left, *width_out, divisor); + } + } + + // XXX need to test for proper handling of sign, not sure I + // trust STP + ExprHandle right = construct(de->right, width_out); + return vc_sbvDivExpr(vc, *width_out, left, right); + } + + case Expr::URem: { + URemExpr *de = static_ref_cast<URemExpr>(e); + ExprHandle left = construct(de->left, width_out); + assert(*width_out!=1 && "uncanonicalized urem"); + + if (de->right.isConstant()) { + uint64_t divisor = de->right.getConstantValue(); + + if (bits64::isPowerOfTwo(divisor)) { + unsigned bits = bits64::indexOfSingleBit(divisor); + + // special case for modding by 1 or else we bvExtract -1:0 + if (bits == 0) { + return bvZero(*width_out); + } else { + return vc_bvConcatExpr(vc, + bvZero(*width_out - bits), + bvExtract(left, bits - 1, 0)); + } + } + + //use fast division to compute modulo without explicit division for constant divisor + if (optimizeDivides) { + if (*width_out == 32) { //only works for 32-bit division + ExprHandle quotient = constructUDivByConstant( left, *width_out, (uint32_t)divisor ); + ExprHandle quot_times_divisor = constructMulByConstant( quotient, *width_out, divisor ); + ExprHandle rem = vc_bvMinusExpr( vc, *width_out, left, quot_times_divisor ); + return rem; + } + } + } + + ExprHandle right = construct(de->right, width_out); + return vc_bvModExpr(vc, *width_out, left, right); + } + + case Expr::SRem: { + SRemExpr *de = static_ref_cast<SRemExpr>(e); + ExprHandle left = construct(de->left, width_out); + ExprHandle right = construct(de->right, width_out); + assert(*width_out!=1 && "uncanonicalized srem"); + +#if 0 //not faster per first benchmark + if (optimizeDivides) { + if (ConstantExpr *cre = de->right->asConstant()) { + uint64_t divisor = cre->asUInt64; + + //use fast division to compute modulo without explicit division for constant divisor + if( *width_out == 32 ) { //only works for 32-bit division + ExprHandle quotient = constructSDivByConstant( left, *width_out, divisor ); + ExprHandle quot_times_divisor = constructMulByConstant( quotient, *width_out, divisor ); + ExprHandle rem = vc_bvMinusExpr( vc, *width_out, left, quot_times_divisor ); + return rem; + } + } + } +#endif + + // XXX implement my fast path and test for proper handling of sign + return vc_sbvModExpr(vc, *width_out, left, right); + } + + // Binary + + case Expr::And: { + AndExpr *ae = static_ref_cast<AndExpr>(e); + ExprHandle left = construct(ae->left, width_out); + ExprHandle right = construct(ae->right, width_out); + if (*width_out==1) { + return vc_andExpr(vc, left, right); + } else { + return vc_bvAndExpr(vc, left, right); + } + } + case Expr::Or: { + OrExpr *oe = static_ref_cast<OrExpr>(e); + ExprHandle left = construct(oe->left, width_out); + ExprHandle right = construct(oe->right, width_out); + if (*width_out==1) { + return vc_orExpr(vc, left, right); + } else { + return vc_bvOrExpr(vc, left, right); + } + } + + case Expr::Xor: { + XorExpr *xe = static_ref_cast<XorExpr>(e); + ExprHandle left = construct(xe->left, width_out); + ExprHandle right = construct(xe->right, width_out); + + if (*width_out==1) { + // XXX check for most efficient? + return vc_iteExpr(vc, left, + ExprHandle(vc_notExpr(vc, right)), right); + } else { + return vc_bvXorExpr(vc, left, right); + } + } + + case Expr::Shl: { + ShlExpr *se = static_ref_cast<ShlExpr>(e); + ExprHandle left = construct(se->left, width_out); + assert(*width_out!=1 && "uncanonicalized shl"); + + if (se->right.isConstant()) { + return bvLeftShift(left, se->right.getConstantValue(), getShiftBits(*width_out)); + } else { + int shiftWidth; + ExprHandle amount = construct(se->right, &shiftWidth); + return bvVarLeftShift( left, amount, *width_out ); + } + } + + case Expr::LShr: { + LShrExpr *lse = static_ref_cast<LShrExpr>(e); + ExprHandle left = construct(lse->left, width_out); + unsigned shiftBits = getShiftBits(*width_out); + assert(*width_out!=1 && "uncanonicalized lshr"); + + if (lse->right.isConstant()) { + return bvRightShift(left, (unsigned) lse->right.getConstantValue(), shiftBits); + } else { + int shiftWidth; + ExprHandle amount = construct(lse->right, &shiftWidth); + return bvVarRightShift( left, amount, *width_out ); + } + } + + case Expr::AShr: { + AShrExpr *ase = static_ref_cast<AShrExpr>(e); + ExprHandle left = construct(ase->left, width_out); + assert(*width_out!=1 && "uncanonicalized ashr"); + + if (ase->right.isConstant()) { + unsigned shift = (unsigned) ase->right.getConstantValue(); + ExprHandle signedBool = bvBoolExtract(left, *width_out-1); + return constructAShrByConstant(left, shift, signedBool, getShiftBits(*width_out)); + } else { + int shiftWidth; + ExprHandle amount = construct(ase->right, &shiftWidth); + return bvVarArithRightShift( left, amount, *width_out ); + } + } + + // Comparison + + case Expr::Eq: { + EqExpr *ee = static_ref_cast<EqExpr>(e); + ExprHandle left = construct(ee->left, width_out); + ExprHandle right = construct(ee->right, width_out); + if (*width_out==1) { + if (ee->left.isConstant()) { + assert(!ee->left.getConstantValue() && "uncanonicalized eq"); + return vc_notExpr(vc, right); + } else { + return vc_iffExpr(vc, left, right); + } + } else { + *width_out = 1; + return vc_eqExpr(vc, left, right); + } + } + + case Expr::Ult: { + UltExpr *ue = static_ref_cast<UltExpr>(e); + ExprHandle left = construct(ue->left, width_out); + ExprHandle right = construct(ue->right, width_out); + assert(*width_out!=1 && "uncanonicalized ult"); + *width_out = 1; + return vc_bvLtExpr(vc, left, right); + } + + case Expr::Ule: { + UleExpr *ue = static_ref_cast<UleExpr>(e); + ExprHandle left = construct(ue->left, width_out); + ExprHandle right = construct(ue->right, width_out); + assert(*width_out!=1 && "uncanonicalized ule"); + *width_out = 1; + return vc_bvLeExpr(vc, left, right); + } + + case Expr::Slt: { + SltExpr *se = static_ref_cast<SltExpr>(e); + ExprHandle left = construct(se->left, width_out); + ExprHandle right = construct(se->right, width_out); + assert(*width_out!=1 && "uncanonicalized slt"); + *width_out = 1; + return vc_sbvLtExpr(vc, left, right); + } + + case Expr::Sle: { + SleExpr *se = static_ref_cast<SleExpr>(e); + ExprHandle left = construct(se->left, width_out); + ExprHandle right = construct(se->right, width_out); + assert(*width_out!=1 && "uncanonicalized sle"); + *width_out = 1; + return vc_sbvLeExpr(vc, left, right); + } + + // unused due to canonicalization +#if 0 + case Expr::Ne: + case Expr::Ugt: + case Expr::Uge: + case Expr::Sgt: + case Expr::Sge: +#endif + + default: + assert(0 && "unhandled Expr type"); + return vc_trueExpr(vc); + } +} diff --git a/lib/Solver/STPBuilder.h b/lib/Solver/STPBuilder.h new file mode 100644 index 00000000..6382bc1f --- /dev/null +++ b/lib/Solver/STPBuilder.h @@ -0,0 +1,125 @@ +//===-- STPBuilder.h --------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef __UTIL_STPBUILDER_H__ +#define __UTIL_STPBUILDER_H__ + +#include "klee/util/ExprHashMap.h" +#include "klee/Config/config.h" + +#include <vector> +#include <map> + +#define Expr VCExpr +#include "stp/c_interface.h" + +#if ENABLE_STPLOG == 1 +#include "stp/stplog.h" +#endif +#undef Expr + +namespace klee { + class ExprHolder { + friend class ExprHandle; + ::VCExpr expr; + unsigned count; + + public: + ExprHolder(const ::VCExpr _expr) : expr(_expr), count(0) {} + ~ExprHolder() { + if (expr) vc_DeleteExpr(expr); + } + }; + + class ExprHandle { + ExprHolder *H; + + public: + ExprHandle() : H(new ExprHolder(0)) { H->count++; } + ExprHandle(::VCExpr _expr) : H(new ExprHolder(_expr)) { H->count++; } + ExprHandle(const ExprHandle &b) : H(b.H) { H->count++; } + ~ExprHandle() { if (--H->count == 0) delete H; } + + ExprHandle &operator=(const ExprHandle &b) { + if (--H->count == 0) delete H; + H = b.H; + H->count++; + return *this; + } + + operator bool () { return H->expr; } + operator ::VCExpr () { return H->expr; } + }; + +class STPBuilder { + ::VC vc; + ExprHandle tempVars[4]; + ExprHashMap< std::pair<ExprHandle, unsigned> > constructed; + + /// optimizeDivides - Rewrite division and reminders by constants + /// into multiplies and shifts. STP should probably handle this for + /// use. + bool optimizeDivides; + +private: + unsigned getShiftBits(unsigned amount) { + return (amount == 64) ? 6 : 5; + } + + ExprHandle bvOne(unsigned width); + ExprHandle bvZero(unsigned width); + ExprHandle bvMinusOne(unsigned width); + ExprHandle bvConst32(unsigned width, uint32_t value); + ExprHandle bvConst64(unsigned width, uint64_t value); + + ExprHandle bvBoolExtract(ExprHandle expr, int bit); + ExprHandle bvExtract(ExprHandle expr, unsigned top, unsigned bottom); + ExprHandle eqExpr(ExprHandle a, ExprHandle b); + + //logical left and right shift (not arithmetic) + ExprHandle bvLeftShift(ExprHandle expr, unsigned shift, unsigned shiftBits); + ExprHandle bvRightShift(ExprHandle expr, unsigned amount, unsigned shiftBits); + ExprHandle bvVarLeftShift(ExprHandle expr, ExprHandle amount, unsigned width); + ExprHandle bvVarRightShift(ExprHandle expr, ExprHandle amount, unsigned width); + ExprHandle bvVarArithRightShift(ExprHandle expr, ExprHandle amount, unsigned width); + + ExprHandle constructAShrByConstant(ExprHandle expr, unsigned shift, + ExprHandle isSigned, unsigned shiftBits); + ExprHandle constructMulByConstant(ExprHandle expr, unsigned width, uint64_t x); + ExprHandle constructUDivByConstant(ExprHandle expr_n, unsigned width, uint64_t d); + ExprHandle constructSDivByConstant(ExprHandle expr_n, unsigned width, uint64_t d); + + ::VCExpr getInitialArray(const Array *os); + ::VCExpr getArrayForUpdate(const Array *root, const UpdateNode *un); + + ExprHandle constructActual(ref<Expr> e, int *width_out); + ExprHandle construct(ref<Expr> e, int *width_out); + + ::VCExpr buildVar(const char *name, unsigned width); + ::VCExpr buildArray(const char *name, unsigned indexWidth, unsigned valueWidth); + +public: + STPBuilder(::VC _vc, bool _optimizeDivides=true); + ~STPBuilder(); + + ExprHandle getTrue(); + ExprHandle getFalse(); + ExprHandle getTempVar(Expr::Width w); + ExprHandle getInitialRead(const Array *os, unsigned index); + + ExprHandle construct(ref<Expr> e) { + ExprHandle res = construct(e, 0); + constructed.clear(); + return res; + } +}; + +} + +#endif diff --git a/lib/Solver/Solver.cpp b/lib/Solver/Solver.cpp new file mode 100644 index 00000000..24d3ef86 --- /dev/null +++ b/lib/Solver/Solver.cpp @@ -0,0 +1,643 @@ +//===-- Solver.cpp --------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Solver.h" +#include "klee/SolverImpl.h" + +#include "SolverStats.h" +#include "STPBuilder.h" + +#include "klee/Constraints.h" +#include "klee/Expr.h" +#include "klee/TimerStatIncrementer.h" +#include "klee/util/Assignment.h" +#include "klee/util/ExprPPrinter.h" +#include "klee/util/ExprUtil.h" +#include "klee/Internal/Support/Timer.h" + +#define vc_bvBoolExtract IAMTHESPAWNOFSATAN + +#include <cassert> +#include <map> +#include <vector> + +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> + +using namespace klee; + +/***/ + +const char *Solver::validity_to_str(Validity v) { + switch (v) { + default: return "Unknown"; + case True: return "True"; + case False: return "False"; + } +} + +Solver::~Solver() { + delete impl; +} + +SolverImpl::~SolverImpl() { +} + +bool Solver::evaluate(const Query& query, Validity &result) { + assert(query.expr.getWidth() == Expr::Bool && "Invalid expression type!"); + + // Maintain invariants implementation expect. + if (query.expr.isConstant()) { + result = query.expr.getConstantValue() ? True : False; + return true; + } + + return impl->computeValidity(query, result); +} + +bool SolverImpl::computeValidity(const Query& query, Solver::Validity &result) { + bool isTrue, isFalse; + if (!computeTruth(query, isTrue)) + return false; + if (isTrue) { + result = Solver::True; + } else { + if (!computeTruth(query.negateExpr(), isFalse)) + return false; + result = isFalse ? Solver::False : Solver::Unknown; + } + return true; +} + +bool Solver::mustBeTrue(const Query& query, bool &result) { + assert(query.expr.getWidth() == Expr::Bool && "Invalid expression type!"); + + // Maintain invariants implementation expect. + if (query.expr.isConstant()) { + result = query.expr.getConstantValue() ? true : false; + return true; + } + + return impl->computeTruth(query, result); +} + +bool Solver::mustBeFalse(const Query& query, bool &result) { + return mustBeTrue(query.negateExpr(), result); +} + +bool Solver::mayBeTrue(const Query& query, bool &result) { + bool res; + if (!mustBeFalse(query, res)) + return false; + result = !res; + return true; +} + +bool Solver::mayBeFalse(const Query& query, bool &result) { + bool res; + if (!mustBeTrue(query, res)) + return false; + result = !res; + return true; +} + +bool Solver::getValue(const Query& query, ref<Expr> &result) { + // Maintain invariants implementation expect. + if (query.expr.isConstant()) { + result = query.expr; + return true; + } + + return impl->computeValue(query, result); +} + +bool +Solver::getInitialValues(const Query& query, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values) { + bool hasSolution; + bool success = + impl->computeInitialValues(query, objects, values, hasSolution); + // FIXME: Propogate this out. + if (!hasSolution) + return false; + + return success; +} + +std::pair< ref<Expr>, ref<Expr> > Solver::getRange(const Query& query) { + ref<Expr> e = query.expr; + Expr::Width width = e.getWidth(); + uint64_t min, max; + + if (width==1) { + Solver::Validity result; + if (!evaluate(query, result)) + assert(0 && "computeValidity failed"); + switch (result) { + case Solver::True: + min = max = 1; break; + case Solver::False: + min = max = 0; break; + default: + min = 0, max = 1; break; + } + } else if (e.isConstant()) { + min = max = e.getConstantValue(); + } else { + // binary search for # of useful bits + uint64_t lo=0, hi=width, mid, bits=0; + while (lo<hi) { + mid = (lo+hi)/2; + bool res; + bool success = + mustBeTrue(query.withExpr( + EqExpr::create(LShrExpr::create(e, + ConstantExpr::create(mid, + width)), + ConstantExpr::create(0, width))), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + hi = mid; + } else { + lo = mid+1; + } + + bits = lo; + } + + // could binary search for training zeros and offset + // min max but unlikely to be very useful + + // check common case + bool res = false; + bool success = + mayBeTrue(query.withExpr(EqExpr::create(e, ConstantExpr::create(0, + width))), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + min = 0; + } else { + // binary search for min + lo=0, hi=bits64::maxValueOfNBits(bits); + while (lo<hi) { + mid = (lo+hi)/2; + bool res = false; + bool success = + mayBeTrue(query.withExpr(UleExpr::create(e, + ConstantExpr::create(mid, + width))), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + hi = mid; + } else { + lo = mid+1; + } + } + + min = lo; + } + + // binary search for max + lo=min, hi=bits64::maxValueOfNBits(bits); + while (lo<hi) { + mid = (lo+hi)/2; + bool res; + bool success = + mustBeTrue(query.withExpr(UleExpr::create(e, + ConstantExpr::create(mid, + width))), + res); + assert(success && "FIXME: Unhandled solver failure"); + if (res) { + hi = mid; + } else { + lo = mid+1; + } + } + + max = lo; + } + + return std::make_pair(ConstantExpr::create(min, width), + ConstantExpr::create(max, width)); +} + +/***/ + +class ValidatingSolver : public SolverImpl { +private: + Solver *solver, *oracle; + +public: + ValidatingSolver(Solver *_solver, Solver *_oracle) + : solver(_solver), oracle(_oracle) {} + ~ValidatingSolver() { delete solver; } + + bool computeValidity(const Query&, Solver::Validity &result); + bool computeTruth(const Query&, bool &isValid); + bool computeValue(const Query&, ref<Expr> &result); + bool computeInitialValues(const Query&, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution); +}; + +bool ValidatingSolver::computeTruth(const Query& query, + bool &isValid) { + bool answer; + + if (!solver->impl->computeTruth(query, isValid)) + return false; + if (!oracle->impl->computeTruth(query, answer)) + return false; + + if (isValid != answer) + assert(0 && "invalid solver result (computeTruth)"); + + return true; +} + +bool ValidatingSolver::computeValidity(const Query& query, + Solver::Validity &result) { + Solver::Validity answer; + + if (!solver->impl->computeValidity(query, result)) + return false; + if (!oracle->impl->computeValidity(query, answer)) + return false; + + if (result != answer) + assert(0 && "invalid solver result (computeValidity)"); + + return true; +} + +bool ValidatingSolver::computeValue(const Query& query, + ref<Expr> &result) { + bool answer; + + if (!solver->impl->computeValue(query, result)) + return false; + // We don't want to compare, but just make sure this is a legal + // solution. + if (!oracle->impl->computeTruth(query.withExpr(NeExpr::create(query.expr, + result)), + answer)) + return false; + + if (answer) + assert(0 && "invalid solver result (computeValue)"); + + return true; +} + +bool +ValidatingSolver::computeInitialValues(const Query& query, + const std::vector<const Array*> + &objects, + std::vector< std::vector<unsigned char> > + &values, + bool &hasSolution) { + bool answer; + + if (!solver->impl->computeInitialValues(query, objects, values, + hasSolution)) + return false; + + if (hasSolution) { + // Assert the bindings as constraints, and verify that the + // conjunction of the actual constraints is satisfiable. + std::vector< ref<Expr> > bindings; + for (unsigned i = 0; i != values.size(); ++i) { + const Array *array = objects[i]; + for (unsigned j=0; j<array->size; j++) { + unsigned char value = values[i][j]; + bindings.push_back(EqExpr::create(ReadExpr::create(UpdateList(array, + true, 0), + ref<Expr>(j, Expr::Int32)), + ref<Expr>(value, Expr::Int8))); + } + } + ConstraintManager tmp(bindings); + ref<Expr> constraints = Expr::createNot(query.expr); + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + constraints = AndExpr::create(constraints, *it); + + if (!oracle->impl->computeTruth(Query(tmp, constraints), answer)) + return false; + if (!answer) + assert(0 && "invalid solver result (computeInitialValues)"); + } else { + if (!oracle->impl->computeTruth(query, answer)) + return false; + if (!answer) + assert(0 && "invalid solver result (computeInitialValues)"); + } + + return true; +} + +Solver *klee::createValidatingSolver(Solver *s, Solver *oracle) { + return new Solver(new ValidatingSolver(s, oracle)); +} + +/***/ + +class STPSolverImpl : public SolverImpl { +private: + /// The solver we are part of, for access to public information. + STPSolver *solver; + VC vc; + STPBuilder *builder; + double timeout; + bool useForkedSTP; + +public: + STPSolverImpl(STPSolver *_solver, bool _useForkedSTP); + ~STPSolverImpl(); + + char *getConstraintLog(const Query&); + void setTimeout(double _timeout) { timeout = _timeout; } + + bool computeTruth(const Query&, bool &isValid); + bool computeValue(const Query&, ref<Expr> &result); + bool computeInitialValues(const Query&, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution); +}; + +static unsigned char *shared_memory_ptr; +static const unsigned shared_memory_size = 1<<20; +static int shared_memory_id; + +static void stp_error_handler(const char* err_msg) { + fprintf(stderr, "error: STP Error: %s\n", err_msg); + abort(); +} + +STPSolverImpl::STPSolverImpl(STPSolver *_solver, bool _useForkedSTP) + : solver(_solver), + vc(vc_createValidityChecker()), + builder(new STPBuilder(vc)), + timeout(0.0), + useForkedSTP(_useForkedSTP) +{ + assert(vc && "unable to create validity checker"); + assert(builder && "unable to create STPBuilder"); + + vc_registerErrorHandler(::stp_error_handler); + + if (useForkedSTP) { + shared_memory_id = shmget(IPC_PRIVATE, shared_memory_size, IPC_CREAT | 0700); + assert(shared_memory_id>=0 && "shmget failed"); + shared_memory_ptr = (unsigned char*) shmat(shared_memory_id, NULL, 0); + assert(shared_memory_ptr!=(void*)-1 && "shmat failed"); + shmctl(shared_memory_id, IPC_RMID, NULL); + } +} + +STPSolverImpl::~STPSolverImpl() { + delete builder; + + vc_Destroy(vc); +} + +/***/ + +STPSolver::STPSolver(bool useForkedSTP) + : Solver(new STPSolverImpl(this, useForkedSTP)) +{ +} + +char *STPSolver::getConstraintLog(const Query &query) { + return static_cast<STPSolverImpl*>(impl)->getConstraintLog(query); +} + +void STPSolver::setTimeout(double timeout) { + static_cast<STPSolverImpl*>(impl)->setTimeout(timeout); +} + +/***/ + +char *STPSolverImpl::getConstraintLog(const Query &query) { + vc_push(vc); + for (std::vector< ref<Expr> >::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + vc_assertFormula(vc, builder->construct(*it)); + assert(query.expr == ref<Expr>(0, Expr::Bool) && + "Unexpected expression in query!"); + + char *buffer; + unsigned long length; + vc_printQueryStateToBuffer(vc, builder->getFalse(), + &buffer, &length, false); + vc_pop(vc); + + return buffer; +} + +bool STPSolverImpl::computeTruth(const Query& query, + bool &isValid) { + std::vector<const Array*> objects; + std::vector< std::vector<unsigned char> > values; + bool hasSolution; + + if (!computeInitialValues(query, objects, values, hasSolution)) + return false; + + isValid = !hasSolution; + return true; +} + +bool STPSolverImpl::computeValue(const Query& query, + ref<Expr> &result) { + std::vector<const Array*> objects; + std::vector< std::vector<unsigned char> > values; + bool hasSolution; + + // Find the object used in the expression, and compute an assignment + // for them. + findSymbolicObjects(query.expr, objects); + if (!computeInitialValues(query.withFalse(), objects, values, hasSolution)) + return false; + assert(hasSolution && "state has invalid constraint set"); + + // Evaluate the expression with the computed assignment. + Assignment a(objects, values); + result = a.evaluate(query.expr); + + return true; +} + +static void runAndGetCex(::VC vc, STPBuilder *builder, ::VCExpr q, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > &values, + bool &hasSolution) { + // XXX I want to be able to timeout here, safely + hasSolution = !vc_query(vc, q); + + if (hasSolution) { + values.reserve(objects.size()); + for (std::vector<const Array*>::const_iterator + it = objects.begin(), ie = objects.end(); it != ie; ++it) { + const Array *array = *it; + std::vector<unsigned char> data; + + data.reserve(array->size); + for (unsigned offset = 0; offset < array->size; offset++) { + ExprHandle counter = + vc_getCounterExample(vc, builder->getInitialRead(array, offset)); + unsigned char val = getBVUnsigned(counter); + data.push_back(val); + } + + values.push_back(data); + } + } +} + +static void stpTimeoutHandler(int x) { + _exit(52); +} + +static bool runAndGetCexForked(::VC vc, + STPBuilder *builder, + ::VCExpr q, + const std::vector<const Array*> &objects, + std::vector< std::vector<unsigned char> > + &values, + bool &hasSolution, + double timeout) { + unsigned char *pos = shared_memory_ptr; + unsigned sum = 0; + for (std::vector<const Array*>::const_iterator + it = objects.begin(), ie = objects.end(); it != ie; ++it) + sum += (*it)->size; + assert(sum<shared_memory_size && "not enough shared memory for counterexample"); + + fflush(stdout); + fflush(stderr); + int pid = fork(); + if (pid==-1) { + fprintf(stderr, "error: fork failed (for STP)"); + return false; + } + + if (pid == 0) { + if (timeout) { + ::alarm(0); /* Turn off alarm so we can safely set signal handler */ + ::signal(SIGALRM, stpTimeoutHandler); + ::alarm(std::max(1, (int)timeout)); + } + unsigned res = vc_query(vc, q); + if (!res) { + for (std::vector<const Array*>::const_iterator + it = objects.begin(), ie = objects.end(); it != ie; ++it) { + const Array *array = *it; + for (unsigned offset = 0; offset < array->size; offset++) { + ExprHandle counter = + vc_getCounterExample(vc, builder->getInitialRead(array, offset)); + *pos++ = getBVUnsigned(counter); + } + } + } + _exit(res); + } else { + int status; + int res = waitpid(pid, &status, 0); + + if (res<0) { + fprintf(stderr, "error: waitpid() for STP failed"); + return false; + } + + // From timed_run.py: It appears that linux at least will on + // "occasion" return a status when the process was terminated by a + // signal, so test signal first. + if (WIFSIGNALED(status) || !WIFEXITED(status)) { + fprintf(stderr, "error: STP did not return successfully"); + return false; + } + + int exitcode = WEXITSTATUS(status); + if (exitcode==0) { + hasSolution = true; + } else if (exitcode==1) { + hasSolution = false; + } else if (exitcode==52) { + fprintf(stderr, "error: STP timed out"); + return false; + } else { + fprintf(stderr, "error: STP did not return a recognized code"); + return false; + } + + if (hasSolution) { + values = std::vector< std::vector<unsigned char> >(objects.size()); + unsigned i=0; + for (std::vector<const Array*>::const_iterator + it = objects.begin(), ie = objects.end(); it != ie; ++it) { + const Array *array = *it; + std::vector<unsigned char> &data = values[i++]; + data.insert(data.begin(), pos, pos + array->size); + pos += array->size; + } + } + + return true; + } +} + +bool +STPSolverImpl::computeInitialValues(const Query &query, + const std::vector<const Array*> + &objects, + std::vector< std::vector<unsigned char> > + &values, + bool &hasSolution) { + TimerStatIncrementer t(stats::queryTime); + + vc_push(vc); + + for (ConstraintManager::const_iterator it = query.constraints.begin(), + ie = query.constraints.end(); it != ie; ++it) + vc_assertFormula(vc, builder->construct(*it)); + + ++stats::queries; + ++stats::queryCounterexamples; + + ExprHandle stp_e = builder->construct(query.expr); + + bool success; + if (useForkedSTP) { + success = runAndGetCexForked(vc, builder, stp_e, objects, values, + hasSolution, timeout); + } else { + runAndGetCex(vc, builder, stp_e, objects, values, hasSolution); + success = true; + } + + if (success) { + if (hasSolution) + ++stats::queriesInvalid; + else + ++stats::queriesValid; + } + + vc_pop(vc); + + return success; +} diff --git a/lib/Solver/SolverStats.cpp b/lib/Solver/SolverStats.cpp new file mode 100644 index 00000000..9d48792a --- /dev/null +++ b/lib/Solver/SolverStats.cpp @@ -0,0 +1,23 @@ +//===-- SolverStats.cpp ---------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SolverStats.h" + +using namespace klee; + +Statistic stats::cexCacheTime("CexCacheTime", "CCtime"); +Statistic stats::queries("Queries", "Q"); +Statistic stats::queriesInvalid("QueriesInvalid", "Qiv"); +Statistic stats::queriesValid("QueriesValid", "Qv"); +Statistic stats::queryCacheHits("QueryCacheHits", "QChits") ; +Statistic stats::queryCacheMisses("QueryCacheMisses", "QCmisses"); +Statistic stats::queryConstructTime("QueryConstructTime", "QBtime") ; +Statistic stats::queryConstructs("QueriesConstructs", "QB"); +Statistic stats::queryCounterexamples("QueriesCEX", "Qcex"); +Statistic stats::queryTime("QueryTime", "Qtime"); diff --git a/lib/Solver/SolverStats.h b/lib/Solver/SolverStats.h new file mode 100644 index 00000000..6fee7699 --- /dev/null +++ b/lib/Solver/SolverStats.h @@ -0,0 +1,32 @@ +//===-- SolverStats.h -------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_SOLVERSTATS_H +#define KLEE_SOLVERSTATS_H + +#include "klee/Statistic.h" + +namespace klee { +namespace stats { + + extern Statistic cexCacheTime; + extern Statistic queries; + extern Statistic queriesInvalid; + extern Statistic queriesValid; + extern Statistic queryCacheHits; + extern Statistic queryCacheMisses; + extern Statistic queryConstructTime; + extern Statistic queryConstructs; + extern Statistic queryCounterexamples; + extern Statistic queryTime; + +} +} + +#endif diff --git a/lib/Support/Makefile b/lib/Support/Makefile new file mode 100644 index 00000000..a1b46f3c --- /dev/null +++ b/lib/Support/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Support/Makefile --------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleeSupport +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Support/README.txt b/lib/Support/README.txt new file mode 100644 index 00000000..1ed6fcb4 --- /dev/null +++ b/lib/Support/README.txt @@ -0,0 +1,2 @@ +This directory holds basic support facilities (data structures, +utilities, etc.) used by klee. diff --git a/lib/Support/RNG.cpp b/lib/Support/RNG.cpp new file mode 100644 index 00000000..fef7e489 --- /dev/null +++ b/lib/Support/RNG.cpp @@ -0,0 +1,146 @@ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + Modified to be a C++ class by Daniel Dunbar. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +#include "klee/Internal/ADT/RNG.h" + +using namespace klee; + +/* initializes mt[N] with a seed */ +RNG::RNG(unsigned int s) { + seed(s); +} + +void RNG::seed(unsigned int s) { + mt[0]= s & 0xffffffffUL; + for (mti=1; mti<N; mti++) { + mt[mti] = + (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffffUL; + /* for >32 bit machines */ + } +} + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned int RNG::getInt32() { + unsigned int y; + static unsigned int mag01[2]={0x0UL, MATRIX_A}; + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= N) { /* generate N words at one time */ + int kk; + + for (kk=0;kk<N-M;kk++) { + y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK); + mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk<N-1;kk++) { + y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK); + mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); + mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* generates a random number on [0,0x7fffffff]-interval */ +int RNG::getInt31() { + return (int)(getInt32()>>1); +} + +/* generates a random number on [0,1]-real-interval */ +double RNG::getDoubleLR() { + return getInt32()*(1.0/4294967295.0); + /* divided by 2^32-1 */ +} + +/* generates a random number on [0,1)-real-interval */ +double RNG::getDoubleL() { + return getInt32()*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on (0,1)-real-interval */ +double RNG::getDouble() { + return (((double)getInt32()) + 0.5)*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +float RNG::getFloatLR() { + return getInt32()*(1.0f/4294967295.0f); + /* divided by 2^32-1 */ +} +float RNG::getFloatL() { + return getInt32()*(1.0f/4294967296.0f); + /* divided by 2^32 */ +} +float RNG::getFloat() { + return (getInt32() + 0.5f)*(1.0f/4294967296.0f); + /* divided by 2^32 */ +} + +bool RNG::getBool() { + unsigned bits = getInt32(); + bits ^= bits >> 16; + bits ^= bits >> 8; + bits ^= bits >> 4; + bits ^= bits >> 2; + bits ^= bits >> 1; + return bits&1; +} diff --git a/lib/Support/Time.cpp b/lib/Support/Time.cpp new file mode 100644 index 00000000..0ec8d9d7 --- /dev/null +++ b/lib/Support/Time.cpp @@ -0,0 +1,27 @@ +//===-- Time.cpp ----------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/System/Time.h" + +#include "llvm/System/Process.h" + +using namespace llvm; +using namespace klee; + +double util::getUserTime() { + sys::TimeValue now(0,0),user(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + return (user.seconds() + (double) user.nanoseconds() * 1e-9); +} + +double util::getWallTime() { + sys::TimeValue now(0,0),user(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + return (now.seconds() + (double) now.nanoseconds() * 1e-9); +} diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp new file mode 100644 index 00000000..cddb0707 --- /dev/null +++ b/lib/Support/Timer.cpp @@ -0,0 +1,27 @@ +//===-- Timer.cpp ---------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Support/Timer.h" + +#include "llvm/System/Process.h" + +using namespace klee; +using namespace llvm; + +WallTimer::WallTimer() { + sys::TimeValue now(0,0),user(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + startMicroseconds = now.usec(); +} + +uint64_t WallTimer::check() { + sys::TimeValue now(0,0),user(0,0),sys(0,0); + sys::Process::GetTimeUsage(now,user,sys); + return now.usec() - startMicroseconds; +} diff --git a/lib/Support/TreeStream.cpp b/lib/Support/TreeStream.cpp new file mode 100644 index 00000000..0e8b86dd --- /dev/null +++ b/lib/Support/TreeStream.cpp @@ -0,0 +1,201 @@ +//===-- TreeStream.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/ADT/TreeStream.h" + +#include <cassert> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <iterator> +#include <map> + +#include <string.h> + +using namespace klee; + +/// + +TreeStreamWriter::TreeStreamWriter(const std::string &_path) + : lastID(0), + bufferCount(0), + path(_path), + output(new std::ofstream(path.c_str(), + std::ios::out | std::ios::binary)), + ids(1) { + if (!output->good()) { + delete output; + output = 0; + } +} + +TreeStreamWriter::~TreeStreamWriter() { + flush(); + if (output) + delete output; +} + +bool TreeStreamWriter::good() { + return !!output; +} + +TreeOStream TreeStreamWriter::open() { + return open(TreeOStream(*this, 0)); +} + +TreeOStream TreeStreamWriter::open(const TreeOStream &os) { + assert(output && os.writer==this); + flushBuffer(); + unsigned id = ids++; + output->write(reinterpret_cast<const char*>(&os.id), 4); + unsigned tag = id | (1<<31); + output->write(reinterpret_cast<const char*>(&tag), 4); + return TreeOStream(*this, id); +} + +void TreeStreamWriter::write(TreeOStream &os, const char *s, unsigned size) { +#if 1 + if (bufferCount && + (os.id!=lastID || size+bufferCount>bufferSize)) + flushBuffer(); + if (bufferCount) { // (os.id==lastID && size+bufferCount<=bufferSize) + memcpy(&buffer[bufferCount], s, size); + bufferCount += size; + } else if (size<bufferSize) { + lastID = os.id; + memcpy(buffer, s, size); + bufferCount = size; + } else { + output->write(reinterpret_cast<const char*>(&os.id), 4); + output->write(reinterpret_cast<const char*>(&size), 4); + output->write(buffer, size); + } +#else + output->write(reinterpret_cast<const char*>(&os.id), 4); + output->write(reinterpret_cast<const char*>(&size), 4); + output->write(s, size); +#endif +} + +void TreeStreamWriter::flushBuffer() { + if (bufferCount) { + output->write(reinterpret_cast<const char*>(&lastID), 4); + output->write(reinterpret_cast<const char*>(&bufferCount), 4); + output->write(buffer, bufferCount); + bufferCount = 0; + } +} + +void TreeStreamWriter::flush() { + flushBuffer(); + output->flush(); +} + +void TreeStreamWriter::readStream(TreeStreamID streamID, + std::vector<unsigned char> &out) { + assert(streamID>0 && streamID<ids); + flush(); + + std::ifstream is(path.c_str(), + std::ios::in | std::ios::binary); + assert(is.good()); +#if 0 + std::cout << "finding chain for: " << streamID << "\n"; +#endif + + std::map<unsigned,unsigned> parents; + std::vector<unsigned> roots; + for (;;) { + assert(is.good()); + unsigned id; + unsigned tag; + is.read(reinterpret_cast<char*>(&id), 4); + is.read(reinterpret_cast<char*>(&tag), 4); + if (tag&(1<<31)) { // fork + unsigned child = tag ^ (1<<31); + + if (child==streamID) { + roots.push_back(child); + while (id) { + roots.push_back(id); + std::map<unsigned, unsigned>::iterator it = parents.find(id); + assert(it!=parents.end()); + id = it->second; + } + break; + } else { + parents.insert(std::make_pair(child,id)); + } + } else { + unsigned size = tag; + while (size--) is.get(); + } + } +#if 0 + std::cout << "roots: "; + std::copy(roots.begin(), roots.end(), std::ostream_iterator<unsigned>(std::cout, " ")); + std::cout << "\n"; +#endif + is.seekg(0, std::ios::beg); + for (;;) { + unsigned id; + unsigned tag; + is.read(reinterpret_cast<char*>(&id), 4); + is.read(reinterpret_cast<char*>(&tag), 4); + if (!is.good()) break; + if (tag&(1<<31)) { // fork + unsigned child = tag ^ (1<<31); + if (id==roots.back() && roots.size()>1 && child==roots[roots.size()-2]) + roots.pop_back(); + } else { + unsigned size = tag; + if (id==roots.back()) { + while (size--) out.push_back(is.get()); + } else { + while (size--) is.get(); + } + } + } +} + +/// + +TreeOStream::TreeOStream() + : writer(0), + id(0) { +} + +TreeOStream::TreeOStream(TreeStreamWriter &_writer, unsigned _id) + : writer(&_writer), + id(_id) { +} + +TreeOStream::~TreeOStream() { +} + +unsigned TreeOStream::getID() const { + assert(writer); + return id; +} + +void TreeOStream::write(const char *buffer, unsigned size) { + assert(writer); + writer->write(*this, buffer, size); +} + +TreeOStream &TreeOStream::operator<<(const std::string &s) { + assert(writer); + write(s.c_str(), s.size()); + return *this; +} + +void TreeOStream::flush() { + assert(writer); + writer->flush(); +} |