From ad0daf5bc3c534f93aff24d196efbfef2ef3e36b Mon Sep 17 00:00:00 2001 From: Tomasz Kuchta Date: Thu, 12 Oct 2023 09:51:24 +0200 Subject: Feature: implement single memory object resolution for symbolic addresses. This feature implements tracking of and resolution of memory objects in the presence of symbolic addresses. For example, an expression like the following: int x; klee_make_symbolic(&x, sizeof(x), "x"); int* tmp = &b.y[x].z; For a concrete array object "y", which is a member of struct "b", a symbolic offset "x" would normally be resolved to any matching memory object - including the ones outside of the object "b". This behaviour is consistent with symbex approach of exploring all execution paths. However, from the point of view of security testing, we would only be interested to know if we are still in-bounds or there is a buffer overflow. The implemented feature creates and tracks (via the GEP instruction) the mapping between the current symbolic offset and the base object it refers to: in our example we are able to tell that the reference should happen within the object "b" (as the array "y" is inside the same memory blob). As a result, we are able to minimize the symbolic exploration to only two paths: one within the bounds of "b", the other with a buffer overflow bug. The feature is turned on via the single-object-resolution command line flag. A new test case was implemented to illustrate how the feature works. --- lib/Core/ExecutionState.h | 4 + lib/Core/Executor.cpp | 180 ++++++++++++++++++++++++---------- test/Feature/SingleObjectResolution.c | 52 ++++++++++ 3 files changed, 185 insertions(+), 51 deletions(-) create mode 100644 test/Feature/SingleObjectResolution.c diff --git a/lib/Core/ExecutionState.h b/lib/Core/ExecutionState.h index 20187166..833537f2 100644 --- a/lib/Core/ExecutionState.h +++ b/lib/Core/ExecutionState.h @@ -248,6 +248,10 @@ public: /// @brief Disables forking for this state. Set by user code bool forkDisabled = false; + /// @brief Mapping symbolic address expressions to concrete base addresses + typedef std::map, ref> base_addrs_t; + base_addrs_t base_addrs; + public: #ifdef KLEE_UNITTEST // provide this function only in the context of unittests diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp index cf6ce777..b96ea843 100644 --- a/lib/Core/Executor.cpp +++ b/lib/Core/Executor.cpp @@ -466,6 +466,12 @@ cl::opt DebugCheckForImpliedValues( cl::desc("Debug the implied value optimization"), cl::cat(DebugCat)); +/*** Misc options ***/ +cl::opt SingleObjectResolution( + "single-object-resolution", + cl::desc("Try to resolve memory reads/writes to single objects " + "when offsets are symbolic (default=false)"), + cl::init(false), cl::cat(MiscCat)); } // namespace // XXX hack @@ -2784,6 +2790,26 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { KGEPInstruction *kgepi = static_cast(ki); ref base = eval(ki, 0, state).value; + bool const_base = isa(base); + bool update = false; + ref original_base = 0; + ref key = 0; + ref value = 0; + + if (SingleObjectResolution) { + ExecutionState::base_addrs_t::iterator base_it; + if (!const_base) { + base_it = state.base_addrs.find(base); + if (base_it != state.base_addrs.end()) { + update = true; + key = base_it->first; + value = base_it->second; + } + } else { + original_base = dyn_cast(base); + } + } + for (std::vector< std::pair >::iterator it = kgepi->indices.begin(), ie = kgepi->indices.end(); it != ie; ++it) { @@ -2796,6 +2822,22 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { if (kgepi->offset) base = AddExpr::create(base, Expr::createPointer(kgepi->offset)); + + if (SingleObjectResolution) { + if (const_base && !isa(base)) { + // the initial base address was a constant expression, the final is not: + // store the mapping between constant address and the non-const + // reference in the state + state.base_addrs[base] = original_base; + } + + if (update) { + // we need to update the current entry with a new value + state.base_addrs[base] = value; + state.base_addrs.erase(key); + } + } + bindLocal(ki, state, base); break; } @@ -4305,71 +4347,107 @@ void Executor::executeMemoryOperation(ExecutionState &state, address = optimizer.optimizeExpr(address, true); - // fast path: single in-bounds resolution ObjectPair op; bool success; solver->setTimeout(coreSolverTimeout); - if (!state.addressSpace.resolveOne(state, solver.get(), address, op, success)) { - address = toConstant(state, address, "resolveOne failure"); - success = state.addressSpace.resolveOne(cast(address), op); - } - solver->setTimeout(time::Span()); - - if (success) { - const MemoryObject *mo = op.first; - if (MaxSymArraySize && mo->size >= MaxSymArraySize) { - address = toConstant(state, address, "max-sym-array-size"); + bool resolveSingleObject = SingleObjectResolution; + + if (resolveSingleObject && !isa(address)) { + // Address is symbolic + + resolveSingleObject = false; + ExecutionState::base_addrs_t::iterator base_it = + state.base_addrs.find(address); + if (base_it != state.base_addrs.end()) { + // Concrete address found in the map, now find the associated memory + // object + if (!state.addressSpace.resolveOne(state, solver.get(), base_it->second, op, + success) || + !success) { + klee_warning("Failed to resolve concrete address from the base_addrs " + "map to a memory object"); + } else { + // We have resolved the stored concrete address to a memory object. + // Now let's see if we can prove an overflow - we are only interested in + // two cases: either we overflow and it's a bug or we don't and we carry + // on; in this mode we are not interested in trying out other memory + // objects + resolveSingleObject = true; + } } - - ref offset = mo->getOffsetExpr(address); - ref check = mo->getBoundsCheckOffset(offset, bytes); - check = optimizer.optimizeExpr(check, true); + } else { + resolveSingleObject = false; + } - bool inBounds; - solver->setTimeout(coreSolverTimeout); - bool success = solver->mustBeTrue(state.constraints, check, inBounds, - state.queryMetaData); - solver->setTimeout(time::Span()); - if (!success) { - state.pc = state.prevPC; - terminateStateOnSolverError(state, "Query timed out (bounds check)."); - return; + if (!resolveSingleObject) { + if (!state.addressSpace.resolveOne(state, solver.get(), address, op, success)) { + address = toConstant(state, address, "resolveOne failure"); + success = state.addressSpace.resolveOne(cast(address), op); } + solver->setTimeout(time::Span()); - if (inBounds) { - const ObjectState *os = op.second; - if (isWrite) { - if (os->readOnly) { - terminateStateOnProgramError(state, "memory error: object read only", - StateTerminationType::ReadOnly); - } else { - ObjectState *wos = state.addressSpace.getWriteable(mo, os); - wos->write(offset, value); - } - } else { - ref result = os->read(offset, type); - - if (interpreterOpts.MakeConcreteSymbolic) - result = replaceReadWithSymbolic(state, result); - - bindLocal(target, state, result); + if (success) { + const MemoryObject *mo = op.first; + + if (MaxSymArraySize && mo->size >= MaxSymArraySize) { + address = toConstant(state, address, "max-sym-array-size"); } - return; + ref offset = mo->getOffsetExpr(address); + ref check = mo->getBoundsCheckOffset(offset, bytes); + check = optimizer.optimizeExpr(check, true); + + bool inBounds; + solver->setTimeout(coreSolverTimeout); + bool success = solver->mustBeTrue(state.constraints, check, inBounds, + state.queryMetaData); + solver->setTimeout(time::Span()); + if (!success) { + state.pc = state.prevPC; + terminateStateOnSolverError(state, "Query timed out (bounds check)."); + return; + } + + if (inBounds) { + const ObjectState *os = op.second; + if (isWrite) { + if (os->readOnly) { + terminateStateOnProgramError(state, "memory error: object read only", + StateTerminationType::ReadOnly); + } else { + ObjectState *wos = state.addressSpace.getWriteable(mo, os); + wos->write(offset, value); + } + } else { + ref result = os->read(offset, type); + + if (interpreterOpts.MakeConcreteSymbolic) + result = replaceReadWithSymbolic(state, result); + + bindLocal(target, state, result); + } + + return; + } } - } + } // we are on an error path (no resolution, multiple resolution, one - // resolution with out of bounds) + // resolution with out of bounds), or we do a single object resolution address = optimizer.optimizeExpr(address, true); - ResolutionList rl; - solver->setTimeout(coreSolverTimeout); - bool incomplete = state.addressSpace.resolve(state, solver.get(), address, rl, - 0, coreSolverTimeout); - solver->setTimeout(time::Span()); - + ResolutionList rl; + bool incomplete = false; + + if (!resolveSingleObject) { + solver->setTimeout(coreSolverTimeout); + incomplete = state.addressSpace.resolve(state, solver.get(), address, rl, 0, + coreSolverTimeout); + solver->setTimeout(time::Span()); + } else { + rl.push_back(op); // we already have the object pair, no need to look for it + } // XXX there is some query wasteage here. who cares? ExecutionState *unbound = &state; @@ -4401,7 +4479,7 @@ void Executor::executeMemoryOperation(ExecutionState &state, if (!unbound) break; } - + // XXX should we distinguish out of bounds and overlapped cases? if (unbound) { if (incomplete) { diff --git a/test/Feature/SingleObjectResolution.c b/test/Feature/SingleObjectResolution.c new file mode 100644 index 00000000..687ce9b6 --- /dev/null +++ b/test/Feature/SingleObjectResolution.c @@ -0,0 +1,52 @@ +// RUN: %clang %s -g -emit-llvm %O0opt -c -o %t.bc +// RUN: rm -rf %t.klee-out +// RUN: %klee --output-dir=%t.klee-out --single-object-resolution %t.bc > %t.log 2>&1 +// RUN: FileCheck %s -input-file=%t.log + +#include "klee/klee.h" +#include + +struct A { + long long int y; + long long int y2; + int z; +}; + +struct B { + long long int x; + struct A y[20]; + struct A *y1; + struct A *y2; + int z; +}; + +int foo(int *pointer) { + //printf("pointer is called\n"); + int *ptr = pointer + 123; + return *ptr; +} + +int main(int argc, char *argv[]) { + + int x; + struct B b; + + // create a lot of memory objects + int *ptrs[1024]; + for (int i = 0; i < 1024; i++) { + ptrs[i] = malloc(23); + } + + klee_make_symbolic(&x, sizeof(x), "x"); + + b.y1 = malloc(20 * sizeof(struct A)); + + // dereference of a pointer within a struct + int *tmp = &b.y1[x].z; + + // CHECK: SingleObjectResolution.c:26: memory error: out of bound pointer + // CHECK: KLEE: done: completed paths = 1 + // CHECK: KLEE: done: partially completed paths = 1 + // CHECK: KLEE: done: generated tests = 2 + return foo(tmp); +} -- cgit 1.4.1