From 00cdc62b7a8df39d13f734ecc77077e427912f64 Mon Sep 17 00:00:00 2001 From: Martin Nowack Date: Fri, 8 Jul 2016 14:42:34 +0200 Subject: Support gzip-based compression of raw_outstreams Provide initial zlib-based compression support for raw_outstreams. Replacing llvm::raw_fd_outstreams with compressed_fd_outstreams automatically compresses data in gzip format before writing to file. Options added: * --compress-log to compress all query log files (e.g. *.pc, *.smt2) on the fly. Every query log file gets extended with .gz. * --debug-compress-instructions to compress logfile for instruction stream on the fly. --- Makefile.config.in | 2 + autoconf/configure.ac | 35 +++++++ configure | 118 +++++++++++++++++++++ include/klee/Config/config.h.in | 3 + include/klee/Internal/Support/CompressionStream.h | 46 +++++++++ lib/Core/Executor.cpp | 23 +++++ lib/Solver/QueryLoggingSolver.cpp | 42 ++++++-- lib/Solver/QueryLoggingSolver.h | 2 +- lib/Support/CompressionStream.cpp | 119 ++++++++++++++++++++++ test/Feature/CompressedExprLogging.c | 42 ++++++++ tools/kleaver/Makefile | 4 + tools/klee/Makefile | 4 + 12 files changed, 430 insertions(+), 10 deletions(-) create mode 100644 include/klee/Internal/Support/CompressionStream.h create mode 100644 lib/Support/CompressionStream.cpp create mode 100644 test/Feature/CompressedExprLogging.c diff --git a/Makefile.config.in b/Makefile.config.in index 1b0b7ba8..d9572f7d 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -51,6 +51,8 @@ HAVE_SELINUX := @HAVE_SELINUX@ HAVE_TCMALLOC := @HAVE_TCMALLOC@ TCMALLOC_LIB := @TCMALLOC_LIB@ +HAVE_ZLIB := @HAVE_ZLIB@ + RUNTIME_ENABLE_OPTIMIZED := @RUNTIME_ENABLE_OPTIMIZED@ RUNTIME_DISABLE_ASSERTIONS := @RUNTIME_DISABLE_ASSERTIONS@ RUNTIME_DEBUG_SYMBOLS := @RUNTIME_DEBUG_SYMBOLS@ diff --git a/autoconf/configure.ac b/autoconf/configure.ac index a08e190e..0fb8f8db 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -603,6 +603,41 @@ AS_IF([test "x$have_tcmalloc" = "xyes"], ]) ]) +dnl ************************************************************************** +dnl Test for zlib +dnl ************************************************************************** + +AC_ARG_WITH([zlib], + AS_HELP_STRING([--without-zlib], [Ignore presence of zlib and disable it (default=detect)])) + +AS_IF([test "x$with_zlib" != "xno"], + AC_CHECK_HEADERS([zlib.h], + [have_zlib=yes], [have_zlib=no]), + [have_zlib=no]) + +AS_IF([test "x$have_zlib" = "xyes"], + [ + AC_SEARCH_LIBS(deflateEnd, z, + [ + AC_SUBST(HAVE_ZLIB, 1) + if test "${ac_cv_search_zlib}" != "none required"; then + ZLIB_LIB=${ac_cv_search_zlib} + AC_SUBST(ZLIB_LIB) + fi + ], + [ + AC_MSG_WARN([Could not link with zlib]) + AC_SUBST(HAVE_ZLIB, 0) + ],) + + ], + [AS_IF([test "x$with_zlib" = "xyes"], + [AC_MSG_ERROR([zlib requested but not found])], + [ + AC_SUBST(HAVE_ZLIB, 0) + ]) +]) + dnl ************************************************************************** dnl Find an install of STP dnl ************************************************************************** diff --git a/configure b/configure index 7f1839c0..f6059789 100755 --- a/configure +++ b/configure @@ -633,6 +633,8 @@ ENABLE_Z3 STP_LDFLAGS STP_CFLAGS ENABLE_STP +ZLIB_LIB +HAVE_ZLIB TCMALLOC_LIB HAVE_TCMALLOC CXXCPP @@ -737,6 +739,7 @@ with_uclibc enable_posix_runtime with_runtime with_tcmalloc +with_zlib with_stp with_z3 with_metasmt @@ -1392,6 +1395,8 @@ Optional Packages: (default [Release+Asserts]) --without-tcmalloc Ignore presence of tcmalloc and disable it (default=detect) + --without-zlib Ignore presence of zlib and disable it + (default=detect) --with-stp Location of STP installation directory --with-z3 Location of Z3 installation directory --with-metasmt Location of metaSMT installation directory @@ -5034,6 +5039,119 @@ fi fi + +# Check whether --with-zlib was given. +if test "${with_zlib+set}" = set; then : + withval=$with_zlib; +fi + + +if test "x$with_zlib" != "xno"; then : + for ac_header in zlib.h +do : + ac_fn_cxx_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default" +if test "x$ac_cv_header_zlib_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_ZLIB_H 1 +_ACEOF + have_zlib=yes +else + have_zlib=no +fi + +done + +else + have_zlib=no +fi + +if test "x$have_zlib" = "xyes"; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing deflateEnd" >&5 +$as_echo_n "checking for library containing deflateEnd... " >&6; } +if ${ac_cv_search_deflateEnd+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char deflateEnd (); +int +main () +{ +return deflateEnd (); + ; + return 0; +} +_ACEOF +for ac_lib in '' z; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_cxx_try_link "$LINENO"; then : + ac_cv_search_deflateEnd=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_deflateEnd+:} false; then : + break +fi +done +if ${ac_cv_search_deflateEnd+:} false; then : + +else + ac_cv_search_deflateEnd=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_deflateEnd" >&5 +$as_echo "$ac_cv_search_deflateEnd" >&6; } +ac_res=$ac_cv_search_deflateEnd +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + + HAVE_ZLIB=1 + + if test "${ac_cv_search_zlib}" != "none required"; then + ZLIB_LIB=${ac_cv_search_zlib} + + fi + +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Could not link with zlib" >&5 +$as_echo "$as_me: WARNING: Could not link with zlib" >&2;} + HAVE_ZLIB=0 + + +fi + + + +else + if test "x$with_zlib" = "xyes"; then : + as_fn_error $? "zlib requested but not found" "$LINENO" 5 +else + + HAVE_ZLIB=0 + + +fi + +fi + + ENABLE_STP=0 # Check whether --with-stp was given. diff --git a/include/klee/Config/config.h.in b/include/klee/Config/config.h.in index 008b2f90..87d6ee75 100644 --- a/include/klee/Config/config.h.in +++ b/include/klee/Config/config.h.in @@ -69,6 +69,9 @@ /* Z3 needs a Z3_context passed to Z3_get_error_msg() */ #undef HAVE_Z3_GET_ERROR_MSG_NEEDS_CONTEXT +/* Define to 1 if you have the header file. */ +#undef HAVE_ZLIB_H + /* Define to empty or 'const' depending on how SELinux qualifies its security context parameters. */ #undef KLEE_SELINUX_CTX_CONST diff --git a/include/klee/Internal/Support/CompressionStream.h b/include/klee/Internal/Support/CompressionStream.h new file mode 100644 index 00000000..fda68d89 --- /dev/null +++ b/include/klee/Internal/Support/CompressionStream.h @@ -0,0 +1,46 @@ +//===-- CompressionStream.h --------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef INCLUDE_KLEE_INTERNAL_SUPPORT_COMPRESSIONSTREAM_H_ +#define INCLUDE_KLEE_INTERNAL_SUPPORT_COMPRESSIONSTREAM_H_ + +#include "llvm/Support/raw_ostream.h" +#include "zlib.h" + +namespace klee { +const size_t BUFSIZE = 128 * 1024; + +class compressed_fd_ostream : public llvm::raw_ostream { + int FD; + uint8_t buffer[BUFSIZE]; + z_stream strm; + uint64_t pos; + + /// write_impl - See raw_ostream::write_impl. + virtual void write_impl(const char *Ptr, size_t Size); + void write_file(const char *Ptr, size_t Size); + + virtual uint64_t current_pos() const { return pos; } + + void flush_compressed_data(); + void writeFullCompressedData(); + +public: + /// compressed_fd_ostream - Open the specified file for writing. If an error + /// occurs, information about the error is put into ErrorInfo, and the stream + /// should be immediately destroyed; the string will be empty if no error + /// occurred. This allows optional flags to control how the file will be + /// opened. + compressed_fd_ostream(const char *Filename, std::string &ErrorInfo); + + ~compressed_fd_ostream(); +}; +} + +#endif /* INCLUDE_KLEE_INTERNAL_SUPPORT_COMPRESSIONSTREAM_H_ */ diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp index 49b022f8..72451582 100644 --- a/lib/Core/Executor.cpp +++ b/lib/Core/Executor.cpp @@ -89,6 +89,10 @@ #include "llvm/IR/CallSite.h" #endif +#ifdef HAVE_ZLIB_H +#include "klee/Internal/Support/CompressionStream.h" +#endif + #include #include #include @@ -158,6 +162,11 @@ namespace { "[inst_id]"), clEnumValEnd), llvm::cl::CommaSeparated); +#ifdef HAVE_ZLIB_H + cl::opt DebugCompressInstructions( + "debug-compress-instructions", cl::init(false), + cl::desc("Compress the logged instructions in gzip format.")); +#endif cl::opt DebugCheckForImpliedValues("debug-check-for-implied-values"); @@ -332,6 +341,10 @@ Executor::Executor(const InterpreterOptions &opts, InterpreterHandler *ih) std::string debug_file_name = interpreterHandler->getOutputFilename("instructions.txt"); std::string ErrorInfo; +#ifdef HAVE_ZLIB_H + if (!DebugCompressInstructions) { +#endif + #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 5) debugInstFile = new llvm::raw_fd_ostream(debug_file_name.c_str(), ErrorInfo, llvm::sys::fs::OpenFlags::F_Text), @@ -339,6 +352,16 @@ Executor::Executor(const InterpreterOptions &opts, InterpreterHandler *ih) debugInstFile = new llvm::raw_fd_ostream(debug_file_name.c_str(), ErrorInfo); #endif +#ifdef HAVE_ZLIB_H + } else { + debugInstFile = new compressed_fd_ostream( + (debug_file_name + ".gz").c_str(), ErrorInfo); + } +#endif + if (ErrorInfo != "") { + klee_error("Could not open file %s : %s", debug_file_name.c_str(), + ErrorInfo.c_str()); + } } } diff --git a/lib/Solver/QueryLoggingSolver.cpp b/lib/Solver/QueryLoggingSolver.cpp index f93bec3c..fb957b65 100644 --- a/lib/Solver/QueryLoggingSolver.cpp +++ b/lib/Solver/QueryLoggingSolver.cpp @@ -7,8 +7,13 @@ // //===----------------------------------------------------------------------===// #include "QueryLoggingSolver.h" +#include "klee/Config/config.h" #include "klee/Internal/System/Time.h" #include "klee/Statistics.h" +#ifdef HAVE_ZLIB_H +#include "klee/Internal/Support/CompressionStream.h" +#include "klee/Internal/Support/ErrorHandling.h" +#endif #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 5) #include "llvm/Support/FileSystem.h" @@ -20,30 +25,49 @@ namespace { llvm::cl::opt DumpPartialQueryiesEarly( "log-partial-queries-early", llvm::cl::init(false), llvm::cl::desc("Log queries before calling the solver (default=off)")); + +#ifdef HAVE_ZLIB_H +llvm::cl::opt CreateCompressedQueryLog( + "compress-query-log", llvm::cl::init(false), + llvm::cl::desc("Compress query log files (default=off)")); +#endif } QueryLoggingSolver::QueryLoggingSolver(Solver *_solver, std::string path, const std::string &commentSign, int queryTimeToLog) - : solver(_solver), + : solver(_solver), os(0), BufferString(""), logBuffer(BufferString), + queryCount(0), minQueryTimeToLog(queryTimeToLog), startTime(0.0f), + lastQueryTime(0.0f), queryCommentSign(commentSign) { + if (!CreateCompressedQueryLog) { #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 5) - os(path.c_str(), ErrorInfo, llvm::sys::fs::OpenFlags::F_Text), + os = new llvm::raw_fd_ostream(path.c_str(), ErrorInfo, + llvm::sys::fs::OpenFlags::F_Text); #else - os(path.c_str(), ErrorInfo), + os = new llvm::raw_fd_ostream(path.c_str(), ErrorInfo); +#endif + } +#ifdef HAVE_ZLIB_H + else { + os = new compressed_fd_ostream((path + ".gz").c_str(), ErrorInfo); + } + if (ErrorInfo != "") { + klee_error("Could not open file %s : %s", path.c_str(), ErrorInfo.c_str()); + } #endif - BufferString(""), logBuffer(BufferString), queryCount(0), - minQueryTimeToLog(queryTimeToLog), startTime(0.0f), lastQueryTime(0.0f), - queryCommentSign(commentSign) { assert(0 != solver); } -QueryLoggingSolver::~QueryLoggingSolver() { delete solver; } +QueryLoggingSolver::~QueryLoggingSolver() { + delete solver; + delete os; +} void QueryLoggingSolver::flushBufferConditionally(bool writeToFile) { logBuffer.flush(); if (writeToFile) { - os << logBuffer.str(); - os.flush(); + *os << logBuffer.str(); + os->flush(); } // prepare the buffer for reuse BufferString = ""; diff --git a/lib/Solver/QueryLoggingSolver.h b/lib/Solver/QueryLoggingSolver.h index bb266c67..7ac783d1 100644 --- a/lib/Solver/QueryLoggingSolver.h +++ b/lib/Solver/QueryLoggingSolver.h @@ -26,7 +26,7 @@ class QueryLoggingSolver : public SolverImpl { protected: Solver *solver; std::string ErrorInfo; - llvm::raw_fd_ostream os; + llvm::raw_ostream *os; // @brief Buffer used by logBuffer std::string BufferString; // @brief buffer to store logs before flushing to file diff --git a/lib/Support/CompressionStream.cpp b/lib/Support/CompressionStream.cpp new file mode 100644 index 00000000..eb208edf --- /dev/null +++ b/lib/Support/CompressionStream.cpp @@ -0,0 +1,119 @@ +//===-- CompressionStream.cpp --------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "klee/Config/config.h" +#include "klee/Config/Version.h" +#ifdef HAVE_ZLIB_H +#include "klee/Internal/Support/CompressionStream.h" +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) +#include "llvm/Support/system_error.h" +#else +#include +#include +#include +#include +#endif + +namespace klee { + +compressed_fd_ostream::compressed_fd_ostream(const char *Filename, + std::string &ErrorInfo) + : llvm::raw_ostream(), pos(0) { + ErrorInfo = ""; +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) + // Open file in binary mode + llvm::error_code EC = + llvm::sys::fs::openFileForWrite(Filename, FD, llvm::sys::fs::F_Binary); + + if (EC) { + ErrorInfo = EC.message(); + FD = -1; + } +#else + FD = ::open(Filename, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (FD < 0) { + ErrorInfo = "Could not open file."; + FD = -1; + } +#endif + // Initialize the compression library + strm.zalloc = 0; + strm.zfree = 0; + strm.next_out = buffer; + strm.avail_out = BUFSIZE; + + deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, 15 | 16, + 8 /* memory usage default, 0 smalles, 9 highest*/, + Z_DEFAULT_STRATEGY); +} + +void compressed_fd_ostream::writeFullCompressedData() { + // Check if no space available and write the buffer + if (strm.avail_out == 0) { + write_file(reinterpret_cast(buffer), BUFSIZE); + strm.next_out = buffer; + strm.avail_out = BUFSIZE; + } +} + +void compressed_fd_ostream::flush_compressed_data() { + // flush data from the raw buffer + flush(); + + // write the remaining data + int deflate_res = Z_OK; + while (deflate_res == Z_OK) { + // Check if no space available and write the buffer + writeFullCompressedData(); + deflate_res = deflate(&strm, Z_FINISH); + } + assert(deflate_res == Z_STREAM_END); + write_file(reinterpret_cast(buffer), BUFSIZE - strm.avail_out); +} + +compressed_fd_ostream::~compressed_fd_ostream() { + if (FD >= 0) { + // write the remaining data + flush_compressed_data(); + close(FD); + } + deflateEnd(&strm); +} + +void compressed_fd_ostream::write_impl(const char *Ptr, size_t Size) { + strm.next_in = + const_cast(reinterpret_cast(Ptr)); + strm.avail_in = Size; + + // Check if there is still data to compress + while (strm.avail_in != 0) { + // compress data + int res = deflate(&strm, Z_NO_FLUSH); + assert(res == Z_OK); + writeFullCompressedData(); + } +} + +void compressed_fd_ostream::write_file(const char *Ptr, size_t Size) { + pos += Size; + assert(FD >= 0 && "File already closed"); + do { + ssize_t ret = ::write(FD, Ptr, Size); + if (ret < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + assert(0 && "Could not write to file"); + break; + } + + Ptr += ret; + Size -= ret; + } while (Size > 0); +} +} +#endif diff --git a/test/Feature/CompressedExprLogging.c b/test/Feature/CompressedExprLogging.c new file mode 100644 index 00000000..a2a07d8b --- /dev/null +++ b/test/Feature/CompressedExprLogging.c @@ -0,0 +1,42 @@ +// RUN: %llvmgcc %s -emit-llvm -g -O0 -c -o %t1.bc +// We disable the cex-cache to eliminate nondeterminism across different +// solvers, in particular when counting the number of queries in the last two +// commands +// RUN: rm -rf %t.klee-out %t.klee-out2 +// RUN: %klee --output-dir=%t.klee-out --use-cex-cache=false --use-query-log=all:pc %t1.bc +// RUN: %klee --output-dir=%t.klee-out2 --use-cex-cache=false --compress-query-log --use-query-log=all:pc %t1.bc +// RUN: gunzip -d %t.klee-out2/all-queries.pc.gz +// RUN: diff %t.klee-out/all-queries.pc %t.klee-out/all-queries.pc + +#include + +int constantArr[16] = {1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, + 1 << 6, 1 << 7, 1 << 8, 1 << 9, 1 << 10, 1 << 11, + 1 << 12, 1 << 13, 1 << 14, 1 << 15}; + +int main() { + char buf[4]; + klee_make_symbolic(buf, sizeof buf); + + buf[1] = 'a'; + + constantArr[klee_range(0, 16, "idx.0")] = buf[0]; + + // Use this to trigger an interior update list usage. + int y = constantArr[klee_range(0, 16, "idx.1")]; + + constantArr[klee_range(0, 16, "idx.2")] = buf[3]; + + buf[klee_range(0, 4, "idx.3")] = 0; + klee_assume(buf[0] == 'h'); + + int x = *((int *)buf); + klee_assume(x > 2); + klee_assume(x == constantArr[12]); + + klee_assume(y != (1 << 5)); + + assert(0); + + return 0; +} diff --git a/tools/kleaver/Makefile b/tools/kleaver/Makefile index 1631dda6..a2250fe0 100644 --- a/tools/kleaver/Makefile +++ b/tools/kleaver/Makefile @@ -32,3 +32,7 @@ include $(PROJ_SRC_ROOT)/MetaSMT.mk ifeq ($(HAVE_TCMALLOC),1) LIBS += $(TCMALLOC_LIB) endif + +ifeq ($(HAVE_ZLIB),1) + LIBS += -lz +endif diff --git a/tools/klee/Makefile b/tools/klee/Makefile index 676507e0..8d50403f 100644 --- a/tools/klee/Makefile +++ b/tools/klee/Makefile @@ -33,3 +33,7 @@ include $(PROJ_SRC_ROOT)/MetaSMT.mk ifeq ($(HAVE_TCMALLOC),1) LIBS += $(TCMALLOC_LIB) endif + +ifeq ($(HAVE_ZLIB),1) + LIBS += -lz +endif \ No newline at end of file -- cgit 1.4.1