diff options
Diffstat (limited to 'instrumentation')
21 files changed, 850 insertions, 577 deletions
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc index 235ee30f..62de6ec5 100644 --- a/instrumentation/LLVMInsTrim.so.cc +++ b/instrumentation/LLVMInsTrim.so.cc @@ -38,7 +38,7 @@ typedef long double max_align_t; #include "MarkNodes.h" #include "afl-llvm-common.h" -#include "llvm-ngram-coverage.h" +#include "llvm-alternative-coverage.h" #include "config.h" #include "debug.h" @@ -135,7 +135,7 @@ struct InsTrim : public ModulePass { unsigned int PrevLocSize = 0; char * ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE"); if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE"); - char *ctx_str = getenv("AFL_LLVM_CTX"); + char *caller_str = getenv("AFL_LLVM_CALLER"); #ifdef AFL_HAVE_VECTOR_INTRINSICS unsigned int ngram_size = 0; @@ -197,9 +197,9 @@ struct InsTrim : public ModulePass { GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); GlobalVariable *AFLPrevLoc; GlobalVariable *AFLContext = NULL; - LoadInst * PrevCtx = NULL; // for CTX sensitive coverage + LoadInst * PrevCaller = NULL; // for CALLER sensitive coverage - if (ctx_str) + if (caller_str) #if defined(__ANDROID__) || defined(__HAIKU__) AFLContext = new GlobalVariable( M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx"); @@ -398,11 +398,11 @@ struct InsTrim : public ModulePass { unsigned int cur_loc; // Context sensitive coverage - if (ctx_str && &BB == &F.getEntryBlock()) { + if (caller_str && &BB == &F.getEntryBlock()) { - PrevCtx = IRB.CreateLoad(AFLContext); - PrevCtx->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); + PrevCaller = IRB.CreateLoad(AFLContext); + PrevCaller->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); // does the function have calls? and is any of the calls larger than // one basic block? @@ -441,7 +441,7 @@ struct InsTrim : public ModulePass { } - } // END of ctx_str + } // END of caller_str if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; } @@ -459,7 +459,7 @@ struct InsTrim : public ModulePass { BasicBlock *PBB = *PI; auto It = PredMap.insert({PBB, genLabel()}); unsigned Label = It.first->second; - cur_loc = Label; + // cur_loc = Label; PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB); } @@ -485,9 +485,9 @@ struct InsTrim : public ModulePass { #endif PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); - if (ctx_str) + if (caller_str) PrevLocTrans = - IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty); + IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCaller), Int32Ty); /* Load SHM pointer */ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); @@ -535,16 +535,17 @@ struct InsTrim : public ModulePass { IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - if (ctx_str && has_calls) { + if (caller_str && has_calls) { - // in CTX mode we have to restore the original context for the + // in CALLER mode we have to restore the original context for the // caller - she might be calling other functions which need the - // correct CTX + // correct CALLER Instruction *Inst = BB.getTerminator(); if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { IRBuilder<> Post_IRB(Inst); - StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + StoreInst * RestoreCtx = + Post_IRB.CreateStore(PrevCaller, AFLContext); RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); diff --git a/instrumentation/README.cmplog.md b/instrumentation/README.cmplog.md index 5f855e1f..a796c7a7 100644 --- a/instrumentation/README.cmplog.md +++ b/instrumentation/README.cmplog.md @@ -1,10 +1,11 @@ # CmpLog instrumentation -The CmpLog instrumentation enables the logging of the comparisons operands in a +The CmpLog instrumentation enables logging of comparison operands in a shared memory. These values can be used by various mutators built on top of it. -At the moment we support the RedQueen mutator (input-2-state instructions only). +At the moment we support the RedQueen mutator (input-2-state instructions only), +for details see [the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf). ## Build @@ -13,7 +14,7 @@ program. The first version is built using the regular AFL++ instrumentation. -The second one, the CmpLog binary, with setting AFL_LLVM_CMPLOG during the compilation. +The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during the compilation. For example: @@ -26,11 +27,12 @@ export AFL_LLVM_CMPLOG=1 ./configure --cc=~/path/to/afl-clang-fast make cp ./program ./program.cmplog +unset AFL_LLVM_CMPLOG ``` ## Use -AFL++ has the new -c option that needs to be used to specify the CmpLog binary (the second +AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary (the second build). For example: diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md index caf2c09a..335e9921 100644 --- a/instrumentation/README.ctx.md +++ b/instrumentation/README.ctx.md @@ -4,14 +4,19 @@ This is an LLVM-based implementation of the context sensitive branch coverage. -Basically every function gets its own ID and that ID is combined with the -edges of the called functions. +Basically every function gets its own ID and, every time when an edge is logged, +all the IDs in the callstack are hashed and combined with the edge transition +hash to augment the classic edge coverage with the information about the +calling context. So if both function A and function B call a function C, the coverage collected in C will be different. In math the coverage is collected as follows: -`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1` +`map[current_location_ID ^ previous_location_ID >> 1 ^ hash_callstack_IDs] += 1` + +The callstack hash is produced XOR-ing the function IDs to avoid explosion with +recursive functions. ## Usage @@ -20,3 +25,14 @@ Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable. It is highly recommended to increase the MAP_SIZE_POW2 definition in config.h to at least 18 and maybe up to 20 for this as otherwise too many map collisions occur. + +## Caller Branch Coverage + +If the context sensitive coverage introduces too may collisions and becoming +detrimental, the user can choose to augment edge coverage with just the +called function ID, instead of the entire callstack hash. + +In math the coverage is collected as follows: +`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1` + +Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment variable. diff --git a/instrumentation/README.gcc_plugin.md b/instrumentation/README.gcc_plugin.md index 12449efd..230ceb73 100644 --- a/instrumentation/README.gcc_plugin.md +++ b/instrumentation/README.gcc_plugin.md @@ -3,16 +3,20 @@ See [../README.md](../README.md) for the general instruction manual. See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation. +This document describes how to build and use `afl-gcc-fast` and `afl-g++-fast`, +which instrument the target with the help of gcc plugins. + TLDR: - * `apt-get install gcc-VERSION-plugin-dev` - * `make` - * gcc and g++ must point to the gcc-VERSION you you have to set AFL_CC/AFL_CXX + * check the version of your gcc compiler: `gcc --version` + * `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc plugins + * `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can set `AFL_CC`/`AFL_CXX` to point to these! - * just use afl-gcc-fast/afl-g++-fast normally like you would afl-clang-fast + * `make` + * just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with `afl-clang-fast` ## 1) Introduction -The code in this directory allows you to instrument programs for AFL using +The code in this directory allows to instrument programs for AFL using true compiler-level instrumentation, instead of the more crude assembly-level rewriting approach taken by afl-gcc and afl-clang. This has several interesting properties: @@ -27,10 +31,10 @@ several interesting properties: - The instrumentation is CPU-independent. At least in principle, you should be able to rely on it to fuzz programs on non-x86 architectures (after - building afl-fuzz with AFL_NOX86=1). + building `afl-fuzz` with `AFL_NOX86=1`). - Because the feature relies on the internals of GCC, it is gcc-specific - and will *not* work with LLVM (see ../llvm_mode for an alternative). + and will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an alternative). Once this implementation is shown to be sufficiently robust and portable, it will probably replace afl-gcc. For now, it can be built separately and @@ -41,29 +45,32 @@ The idea and much of the implementation comes from Laszlo Szekeres. ## 2) How to use In order to leverage this mechanism, you need to have modern enough GCC -(>= version 4.5.0) and the plugin headers installed on your system. That +(>= version 4.5.0) and the plugin development headers installed on your system. That should be all you need. On Debian machines, these headers can be acquired by installing the `gcc-VERSION-plugin-dev` packages. -To build the instrumentation itself, type 'make'. This will generate binaries -called afl-gcc-fast and afl-g++-fast in the parent directory. +To build the instrumentation itself, type `make`. This will generate binaries +called `afl-gcc-fast` and `afl-g++-fast` in the parent directory. The gcc and g++ compiler links have to point to gcc-VERSION - or set these -by pointing the environment variables AFL_CC/AFL_CXX to them. -If the CC/CXX have been overridden, those compilers will be used from -those wrappers without using AFL_CXX/AFL_CC settings. +by pointing the environment variables `AFL_CC`/`AFL_CXX` to them. +If the `CC`/`CXX` environment variables have been set, those compilers will be +preferred over those from the `AFL_CC`/`AFL_CXX` settings. Once this is done, you can instrument third-party code in a way similar to the standard operating mode of AFL, e.g.: - - CC=/path/to/afl/afl-gcc-fast ./configure [...options...] +``` + CC=/path/to/afl/afl-gcc-fast + CXX=/path/to/afl/afl-g++-fast + export CC CXX + ./configure [...options...] make +``` +Note: We also used `CXX` to set the C++ compiler to `afl-g++-fast` for C++ code. -Be sure to also include CXX set to afl-g++-fast for C++ code. - -The tool honors roughly the same environmental variables as afl-gcc (see -[env_variables.md](../docs/env_variables.md). This includes AFL_INST_RATIO, -AFL_USE_ASAN, AFL_HARDEN, and AFL_DONT_OPTIMIZE. +The tool honors roughly the same environmental variables as `afl-gcc` (see +[env_variables.md](../docs/env_variables.md). This includes `AFL_INST_RATIO`, +`AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`. Note: if you want the GCC plugin to be installed on your system for all users, you need to build it before issuing 'make install' in the parent @@ -72,7 +79,7 @@ directory. ## 3) Gotchas, feedback, bugs This is an early-stage mechanism, so field reports are welcome. You can send bug -reports to afl@aflplus.plus +reports to afl@aflplus.plus. ## 4) Bonus feature #1: deferred initialization @@ -88,7 +95,7 @@ file before getting to the fuzzed data. In such cases, it's beneficial to initialize the forkserver a bit later, once most of the initialization work is already done, but before the binary attempts to read the fuzzed input and parse it; in some cases, this can offer a 10x+ -performance gain. You can implement delayed initialization in LLVM mode in a +performance gain. You can implement delayed initialization in GCC mode in a fairly simple way. First, locate a suitable location in the code where the delayed cloning can @@ -117,7 +124,7 @@ With the location selected, add this code in the appropriate spot: ``` You don't need the #ifdef guards, but they will make the program still work as -usual when compiled with a tool other than afl-gcc-fast/afl-clang-fast. +usual when compiled with a compiler other than afl-gcc-fast/afl-clang-fast. Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will *not* generate a deferred-initialization binary) - and you should be all set! @@ -127,7 +134,7 @@ Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will Some libraries provide APIs that are stateless, or whose state can be reset in between processing different input files. When such a reset is performed, a single long-lived process can be reused to try out multiple test cases, -eliminating the need for repeated fork() calls and the associated OS overhead. +eliminating the need for repeated `fork()` calls and the associated OS overhead. The basic structure of the program that does this would be: @@ -160,5 +167,9 @@ wary of memory leaks and the state of file descriptors. When running in this mode, the execution paths will inherently vary a bit depending on whether the input loop is being entered for the first time or executed again. To avoid spurious warnings, the feature implies -AFL_NO_VAR_CHECK and hides the "variable path" warnings in the UI. +`AFL_NO_VAR_CHECK` and hides the "variable path" warnings in the UI. + +## 6) Bonus feature #3: selective instrumentation +It can be more effective to fuzzing to only instrument parts of the code. +For details see [README.instrument_list.md](README.instrument_list.md). diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md index b7dfb40c..2116d24c 100644 --- a/instrumentation/README.instrument_list.md +++ b/instrumentation/README.instrument_list.md @@ -47,10 +47,10 @@ A special function is `__afl_coverage_interesting`. To use this, you must define `void __afl_coverage_interesting(u8 val, u32 id);`. Then you can use this function globally, where the `val` parameter can be set by you, the `id` parameter is for afl-fuzz and will be overwritten. -Note that useful parameters are for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128. +Note that useful parameters for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128. A value of e.g. 33 will be seen as 32 for coverage purposes. -## 3) Selective instrumenation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST +## 3) Selective instrumentation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST This feature is equivalent to llvm 12 sancov feature and allows to specify on a filename and/or function name level to instrument these or skip them. diff --git a/instrumentation/README.lto.md b/instrumentation/README.lto.md index a2814173..39f6465a 100644 --- a/instrumentation/README.lto.md +++ b/instrumentation/README.lto.md @@ -88,16 +88,35 @@ apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \ ### Building llvm yourself (version 12) Building llvm from github takes quite some long time and is not painless: -``` +```sh sudo apt install binutils-dev # this is *essential*! -git clone https://github.com/llvm/llvm-project +git clone --depth=1 https://github.com/llvm/llvm-project cd llvm-project mkdir build cd build -cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/ -make -j $(nproc) -export PATH=`pwd`/bin:$PATH -export LLVM_CONFIG=`pwd`/bin/llvm-config + +# Add -G Ninja if ninja-build installed +# "Building with ninja significantly improves your build time, especially with +# incremental builds, and improves your memory usage." +cmake \ + -DCLANG_INCLUDE_DOCS="OFF" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_BINUTILS_INCDIR=/usr/include/ \ + -DLLVM_BUILD_LLVM_DYLIB="ON" \ + -DLLVM_ENABLE_BINDINGS="OFF" \ + -DLLVM_ENABLE_PROJECTS='clang;compiler-rt;libcxx;libcxxabi;libunwind;lld' \ + -DLLVM_ENABLE_WARNINGS="OFF" \ + -DLLVM_INCLUDE_BENCHMARKS="OFF" \ + -DLLVM_INCLUDE_DOCS="OFF" \ + -DLLVM_INCLUDE_EXAMPLES="OFF" \ + -DLLVM_INCLUDE_TESTS="OFF" \ + -DLLVM_LINK_LLVM_DYLIB="ON" \ + -DLLVM_TARGETS_TO_BUILD="host" \ + ../llvm/ +cmake --build . -j4 +export PATH="$(pwd)/bin:$PATH" +export LLVM_CONFIG="$(pwd)/bin/llvm-config" +export LD_LIBRARY_PATH="$(llvm-config --libdir)${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" cd /path/to/AFLplusplus/ make sudo make install diff --git a/instrumentation/README.neverzero.md b/instrumentation/README.neverzero.md index 5c894d6e..49104e00 100644 --- a/instrumentation/README.neverzero.md +++ b/instrumentation/README.neverzero.md @@ -16,7 +16,7 @@ at a very little cost (one instruction per edge). (The alternative of saturated counters has been tested also and proved to be inferior in terms of path discovery.) -This is implemented in afl-gcc, however for llvm_mode this is optional if +This is implemented in afl-gcc and afl-gcc-fast, however for llvm_mode this is optional if the llvm version is below 9 - as there is a perfomance bug that is only fixed in version 9 and onwards. diff --git a/instrumentation/README.ngram.md b/instrumentation/README.ngram.md index de3ba432..da61ef32 100644 --- a/instrumentation/README.ngram.md +++ b/instrumentation/README.ngram.md @@ -10,8 +10,8 @@ by Jinghan Wang, et. al. Note that the original implementation (available [here](https://github.com/bitsecurerlab/afl-sensitive)) is built on top of AFL's QEMU mode. -This is essentially a port that uses LLVM vectorized instructions to achieve -the same results when compiling source code. +This is essentially a port that uses LLVM vectorized instructions (available from +llvm versions 4.0.1 and higher) to achieve the same results when compiling source code. In math the branch coverage is performed as follows: `map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1` diff --git a/instrumentation/README.out_of_line.md b/instrumentation/README.out_of_line.md index aad215b6..2264f91f 100644 --- a/instrumentation/README.out_of_line.md +++ b/instrumentation/README.out_of_line.md @@ -1,18 +1,16 @@ -=========================================== -Using afl++ without inlined instrumentation -=========================================== +## Using afl++ without inlined instrumentation This file describes how you can disable inlining of instrumentation. By default, the GCC plugin will duplicate the effects of calling -__afl_trace (see afl-gcc-rt.o.c) in instrumented code, instead of +`__afl_trace` (see `afl-gcc-rt.o.c`) in instrumented code, instead of issuing function calls. The calls are presumed to be slower, more so because the rt file itself is not optimized by the compiler. -Setting AFL_GCC_OUT_OF_LINE=1 in the environment while compiling code +Setting `AFL_GCC_OUT_OF_LINE=1` in the environment while compiling code with the plugin will disable this inlining, issuing calls to the unoptimized runtime instead. diff --git a/instrumentation/README.persistent_mode.md b/instrumentation/README.persistent_mode.md index 2cf76adf..24f81ea0 100644 --- a/instrumentation/README.persistent_mode.md +++ b/instrumentation/README.persistent_mode.md @@ -16,7 +16,7 @@ Examples can be found in [utils/persistent_mode](../utils/persistent_mode). ## 2) TLDR; Example `fuzz_target.c`: -``` +```c #include "what_you_need_for_your_target.h" __AFL_FUZZ_INIT(); @@ -60,14 +60,14 @@ The speed increase is usually x10 to x20. If you want to be able to compile the target without afl-clang-fast/lto then add this just after the includes: -``` +```c #ifndef __AFL_FUZZ_TESTCASE_LEN ssize_t fuzz_len; #define __AFL_FUZZ_TESTCASE_LEN fuzz_len unsigned char fuzz_buf[1024000]; #define __AFL_FUZZ_TESTCASE_BUF fuzz_buf #define __AFL_FUZZ_INIT() void sync(void); - #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? + #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0) #define __AFL_INIT() sync() #endif ``` @@ -75,7 +75,7 @@ add this just after the includes: ## 3) Deferred initialization AFL tries to optimize performance by executing the targeted binary just once, -stopping it just before main(), and then cloning this "main" process to get +stopping it just before `main()`, and then cloning this "main" process to get a steady supply of targets to fuzz. Although this approach eliminates much of the OS-, linker- and libc-level @@ -97,7 +97,7 @@ a location after: - The creation of any vital threads or child processes - since the forkserver can't clone them easily. - - The initialization of timers via setitimer() or equivalent calls. + - The initialization of timers via `setitimer()` or equivalent calls. - The creation of temporary files, network sockets, offset-sensitive file descriptors, and similar shared-state resources - but only provided that @@ -150,9 +150,9 @@ the impact of memory leaks and similar glitches; 1000 is a good starting point, and going much higher increases the likelihood of hiccups without giving you any real performance benefits. -A more detailed template is shown in ../utils/persistent_mode/. -Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef -guards can be used to suppress it when using other compilers. +A more detailed template is shown in `../utils/persistent_mode/.` +Similarly to the previous mode, the feature works only with afl-clang-fast; +`#ifdef` guards can be used to suppress it when using other compilers. Note that as with the previous mode, the feature is easy to misuse; if you do not fully reset the critical state, you may end up with false positives or @@ -161,7 +161,7 @@ wary of memory leaks and of the state of file descriptors. PS. Because there are task switches still involved, the mode isn't as fast as "pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot -faster than the normal fork() model, and compared to in-process fuzzing, +faster than the normal `fork()` model, and compared to in-process fuzzing, should be a lot more robust. ## 5) Shared memory fuzzing @@ -174,17 +174,17 @@ Setting this up is very easy: After the includes set the following macro: -``` +```c __AFL_FUZZ_INIT(); ``` Directly at the start of main - or if you are using the deferred forkserver -with `__AFL_INIT()` then *after* `__AFL_INIT? : -``` +with `__AFL_INIT()` then *after* `__AFL_INIT()` : +```c unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; ``` Then as first line after the `__AFL_LOOP` while loop: -``` +```c int len = __AFL_FUZZ_TESTCASE_LEN; ``` and that is all! diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc index e3490847..6dd390e6 100644 --- a/instrumentation/SanitizerCoverageLTO.so.cc +++ b/instrumentation/SanitizerCoverageLTO.so.cc @@ -507,6 +507,7 @@ bool ModuleSanitizerCoverage::instrumentModule( Zero = ConstantInt::get(Int8Tyi, 0); One = ConstantInt::get(Int8Tyi, 1); + initInstrumentList(); scanForDangerousFunctions(&M); Mo = &M; @@ -733,7 +734,7 @@ bool ModuleSanitizerCoverage::instrumentModule( Var->getInitializer())) { HasStr2 = true; - Str2 = Array->getAsString().str(); + Str2 = Array->getRawDataValues().str(); } @@ -760,7 +761,7 @@ bool ModuleSanitizerCoverage::instrumentModule( if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte - addedNull = true; + // addedNull = true; } @@ -809,7 +810,7 @@ bool ModuleSanitizerCoverage::instrumentModule( Var->getInitializer())) { HasStr1 = true; - Str1 = Array->getAsString().str(); + Str1 = Array->getRawDataValues().str(); } @@ -849,15 +850,18 @@ bool ModuleSanitizerCoverage::instrumentModule( thestring = Str2; optLen = thestring.length(); + if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; } if (isMemcmp || isStrncmp || isStrncasecmp) { Value * op2 = callInst->getArgOperand(2); ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { uint64_t literalLength = optLen; optLen = ilen->getZExtValue(); + if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte thestring.append("\0", 1); addedNull = true; @@ -872,17 +876,21 @@ bool ModuleSanitizerCoverage::instrumentModule( // was not already added if (!isMemcmp) { - if (addedNull == false) { + if (addedNull == false && thestring[optLen - 1] != '\0') { thestring.append("\0", 1); // add null byte optLen++; } - // ensure we do not have garbage - size_t offset = thestring.find('\0', 0); - if (offset + 1 < optLen) optLen = offset + 1; - thestring = thestring.substr(0, optLen); + if (!isStdString) { + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } } @@ -1222,7 +1230,7 @@ void ModuleSanitizerCoverage::instrumentFunction( // afl++ START if (!F.size()) return; - if (isIgnoreFunction(&F)) return; + if (!isInInstrumentList(&F)) return; // afl++ END if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) @@ -1284,10 +1292,17 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, Constant::getNullValue(ArrayTy), "__sancov_gen_"); +#if LLVM_VERSION_MAJOR > 12 + if (TargetTriple.supportsCOMDAT() && + (TargetTriple.isOSBinFormatELF() || !F.isInterposable())) + if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple)) + Array->setComdat(Comdat); +#else if (TargetTriple.supportsCOMDAT() && !F.isInterposable()) if (auto Comdat = GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) Array->setComdat(Comdat); +#endif Array->setSection(getSectionName(Section)); Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize())); GlobalsToAppendToUsed.push_back(Array); diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc index 5d6d6703..09cda9e2 100644 --- a/instrumentation/SanitizerCoveragePCGUARD.so.cc +++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/EHPersonalities.h" @@ -34,11 +35,11 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SpecialCaseList.h" #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) #include "llvm/Support/VirtualFileSystem.h" #endif +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -47,65 +48,6 @@ #include "debug.h" #include "afl-llvm-common.h" -namespace llvm { - -/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The -/// pass instruments functions for coverage, adds initialization calls to the -/// module for trace PC guards and 8bit counters if they are requested, and -/// appends globals to llvm.compiler.used. -class ModuleSanitizerCoveragePass - : public PassInfoMixin<ModuleSanitizerCoveragePass> { - - public: - explicit ModuleSanitizerCoveragePass( - SanitizerCoverageOptions Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = - std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()) - : Options(Options) { - - if (AllowlistFiles.size() > 0) - Allowlist = SpecialCaseList::createOrDie(AllowlistFiles -#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) - , - *vfs::getRealFileSystem() -#endif - ); - if (BlocklistFiles.size() > 0) - Blocklist = SpecialCaseList::createOrDie(BlocklistFiles -#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) - , - *vfs::getRealFileSystem() -#endif - ); - - } - - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - static bool isRequired() { - - return true; - - } - - private: - SanitizerCoverageOptions Options; - - std::unique_ptr<SpecialCaseList> Allowlist; - std::unique_ptr<SpecialCaseList> Blocklist; - -}; - -// Insert SanitizerCoverage instrumentation. -ModulePass *createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()); - -} // namespace llvm - using namespace llvm; #define DEBUG_TYPE "sancov" @@ -156,96 +98,8 @@ static const char *const SanCovLowestStackName = "__sancov_lowest_stack"; static char *skip_nozero; -/* -static cl::opt<int> ClCoverageLevel( - "sanitizer-coverage-level", - cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " - "3: all blocks and critical edges"), - cl::Hidden, cl::init(3)); - -static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc", - cl::desc("Experimental pc tracing"), cl::Hidden, - cl::init(false)); - -static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard", - cl::desc("pc tracing with a guard"), - cl::Hidden, cl::init(true)); - -// If true, we create a global variable that contains PCs of all instrumented -// BBs, put this global into a named section, and pass this section's bounds -// to __sanitizer_cov_pcs_init. -// This way the coverage instrumentation does not need to acquire the PCs -// at run-time. Works with trace-pc-guard, inline-8bit-counters, and -// inline-bool-flag. -static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table", - cl::desc("create a static PC table"), - cl::Hidden, cl::init(false)); - -static cl::opt<bool> ClInline8bitCounters( - "sanitizer-coverage-inline-8bit-counters", - cl::desc("increments 8-bit counter for every edge"), cl::Hidden, - cl::init(false)); - -static cl::opt<bool> ClInlineBoolFlag( - "sanitizer-coverage-inline-bool-flag", - cl::desc("sets a boolean flag for every edge"), cl::Hidden, - cl::init(false)); - -static cl::opt<bool> ClCMPTracing( - "sanitizer-coverage-trace-compares", - cl::desc("Tracing of CMP and similar instructions"), cl::Hidden, - cl::init(false)); - -static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs", - cl::desc("Tracing of DIV instructions"), - cl::Hidden, cl::init(false)); - -static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps", - cl::desc("Tracing of GEP instructions"), - cl::Hidden, cl::init(false)); - -static cl::opt<bool> ClPruneBlocks( - "sanitizer-coverage-prune-blocks", - cl::desc("Reduce the number of instrumented blocks"), cl::Hidden, - cl::init(true)); - -static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth", - cl::desc("max stack depth tracing"), - cl::Hidden, cl::init(false)); -*/ namespace { -/* -SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { - - SanitizerCoverageOptions Res; - switch (LegacyCoverageLevel) { - - case 0: - Res.CoverageType = SanitizerCoverageOptions::SCK_None; - break; - case 1: - Res.CoverageType = SanitizerCoverageOptions::SCK_Function; - break; - case 2: - Res.CoverageType = SanitizerCoverageOptions::SCK_BB; - break; - case 3: - Res.CoverageType = SanitizerCoverageOptions::SCK_Edge; - break; - case 4: - Res.CoverageType = SanitizerCoverageOptions::SCK_Edge; - Res.IndirectCalls = true; - break; - - } - - return Res; - -} - -*/ - SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { // Sets CoverageType and IndirectCalls. @@ -281,12 +135,14 @@ class ModuleSanitizerCoverage { public: ModuleSanitizerCoverage( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const SpecialCaseList * Allowlist = nullptr, - const SpecialCaseList * Blocklist = nullptr) - : Options(OverrideFromCL(Options)), - Allowlist(Allowlist), - Blocklist(Blocklist) { + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions() +#if LLVM_MAJOR > 10 + , + const SpecialCaseList *Allowlist = nullptr, + const SpecialCaseList *Blocklist = nullptr +#endif + ) + : Options(OverrideFromCL(Options)) { } @@ -356,9 +212,6 @@ class ModuleSanitizerCoverage { SanitizerCoverageOptions Options; - const SpecialCaseList *Allowlist; - const SpecialCaseList *Blocklist; - uint32_t instr = 0; GlobalVariable *AFLMapPtr = NULL; ConstantInt * One = NULL; @@ -370,27 +223,17 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass { public: ModuleSanitizerCoverageLegacyPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions() +#if LLVM_VERSION_MAJOR > 10 + , const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(), const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()) + std::vector<std::string>() +#endif + ) : ModulePass(ID), Options(Options) { - if (AllowlistFiles.size() > 0) - Allowlist = SpecialCaseList::createOrDie(AllowlistFiles -#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) - , - *vfs::getRealFileSystem() -#endif - ); - if (BlocklistFiles.size() > 0) - Blocklist = SpecialCaseList::createOrDie(BlocklistFiles -#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) - , - *vfs::getRealFileSystem() -#endif - ); initializeModuleSanitizerCoverageLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -398,8 +241,12 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass { bool runOnModule(Module &M) override { - ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), - Blocklist.get()); + ModuleSanitizerCoverage ModuleSancov(Options +#if LLVM_MAJOR > 10 + , + Allowlist.get(), Blocklist.get() +#endif + ); auto DTCallback = [this](Function &F) -> const DominatorTree * { return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); @@ -444,8 +291,12 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass { PreservedAnalyses ModuleSanitizerCoveragePass::run(Module & M, ModuleAnalysisManager &MAM) { - ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), - Blocklist.get()); + ModuleSanitizerCoverage ModuleSancov(Options +#if LLVM_MAJOR > 10 + , + Allowlist.get(), Blocklist.get() +#endif + ); auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); auto DTCallback = [&FAM](Function &F) -> const DominatorTree * { @@ -564,12 +415,6 @@ bool ModuleSanitizerCoverage::instrumentModule( } if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; - if (Allowlist && - !Allowlist->inSection("coverage", "src", M.getSourceFileName())) - return false; - if (Blocklist && - Blocklist->inSection("coverage", "src", M.getSourceFileName())) - return false; C = &(M.getContext()); DL = &M.getDataLayout(); CurModule = &M; @@ -842,9 +687,6 @@ void ModuleSanitizerCoverage::instrumentFunction( if (F.hasPersonalityFn() && isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) return; - if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName())) - return; - if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) return; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges( F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); @@ -915,10 +757,18 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, Constant::getNullValue(ArrayTy), "__sancov_gen_"); +#if LLVM_VERSION_MAJOR > 12 + if (TargetTriple.supportsCOMDAT() && + (TargetTriple.isOSBinFormatELF() || !F.isInterposable())) + if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple)) + Array->setComdat(Comdat); +#else if (TargetTriple.supportsCOMDAT() && !F.isInterposable()) if (auto Comdat = GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) Array->setComdat(Comdat); +#endif + Array->setSection(getSectionName(Section)); #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0) Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize())); @@ -1088,7 +938,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch( } - llvm::sort(Initializers.begin() + 2, Initializers.end(), + llvm::sort(drop_begin(Initializers, 2), [](const Constant *A, const Constant *B) { return cast<ConstantInt>(A)->getLimitedValue() < @@ -1136,10 +986,10 @@ void ModuleSanitizerCoverage::InjectTraceForGep( for (auto GEP : GepTraceTargets) { IRBuilder<> IRB(GEP); - for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) - if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy()) + for (Use &Idx : GEP->indices()) + if (!isa<ConstantInt>(Idx) && Idx->getType()->isIntegerTy()) IRB.CreateCall(SanCovTraceGepFunction, - {IRB.CreateIntCast(*I, IntptrTy, true)}); + {IRB.CreateIntCast(Idx, IntptrTy, true)}); } @@ -1354,12 +1204,20 @@ INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", false) ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options, + const SanitizerCoverageOptions &Options +#if LLVM_MAJOR > 10 + , const std::vector<std::string> &AllowlistFiles, - const std::vector<std::string> &BlocklistFiles) { + const std::vector<std::string> &BlocklistFiles +#endif +) { - return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles, - BlocklistFiles); + return new ModuleSanitizerCoverageLegacyPass(Options +#if LLVM_MAJOR > 10 + , + AllowlistFiles, BlocklistFiles +#endif + ); } diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index c24173af..f241447a 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -20,7 +20,7 @@ #include "config.h" #include "types.h" #include "cmplog.h" -#include "llvm-ngram-coverage.h" +#include "llvm-alternative-coverage.h" #include <stdio.h> #include <stdlib.h> @@ -34,6 +34,7 @@ #include <errno.h> #include <sys/mman.h> +#include <sys/syscall.h> #ifndef __HAIKU__ #include <sys/shm.h> #endif @@ -70,7 +71,7 @@ run. It will end up as .comm, so it shouldn't be too wasteful. */ #if MAP_SIZE <= 65536 - #define MAP_INITIAL_SIZE 256000 + #define MAP_INITIAL_SIZE 2097152 #else #define MAP_INITIAL_SIZE MAP_SIZE #endif @@ -96,10 +97,12 @@ int __afl_selective_coverage_temp = 1; #if defined(__ANDROID__) || defined(__HAIKU__) PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; +PREV_LOC_T __afl_prev_caller[CTX_MAX_K]; u32 __afl_prev_ctx; u32 __afl_cmp_counter; #else __thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; +__thread PREV_LOC_T __afl_prev_caller[CTX_MAX_K]; __thread u32 __afl_prev_ctx; __thread u32 __afl_cmp_counter; #endif @@ -122,6 +125,21 @@ static u8 is_persistent; static u8 _is_sancov; +/* Debug? */ + +static u32 __afl_debug; + +/* Already initialized markers */ + +u32 __afl_already_initialized_shm; +u32 __afl_already_initialized_forkserver; +u32 __afl_already_initialized_first; +u32 __afl_already_initialized_second; + +/* Dummy pipe for area_is_valid() */ + +static int __afl_dummy_fd[2] = {2, 2}; + /* ensure we kill the child on termination */ void at_exit(int signal) { @@ -171,7 +189,7 @@ static void __afl_map_shm_fuzz() { char *id_str = getenv(SHM_FUZZ_ENV_VAR); - if (getenv("AFL_DEBUG")) { + if (__afl_debug) { fprintf(stderr, "DEBUG: fuzzcase shmem %s\n", id_str ? id_str : "none"); @@ -186,7 +204,7 @@ static void __afl_map_shm_fuzz() { int shm_fd = -1; /* create the shared memory segment as if it was a file */ - shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION); if (shm_fd == -1) { fprintf(stderr, "shm_open() failed for fuzz\n"); @@ -217,7 +235,7 @@ static void __afl_map_shm_fuzz() { __afl_fuzz_len = (u32 *)map; __afl_fuzz_ptr = map + sizeof(u32); - if (getenv("AFL_DEBUG")) { + if (__afl_debug) { fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n"); @@ -237,6 +255,9 @@ static void __afl_map_shm_fuzz() { static void __afl_map_shm(void) { + if (__afl_already_initialized_shm) return; + __afl_already_initialized_shm = 1; + // if we are not running in afl ensure the map exists if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_ptr_dummy; } @@ -244,8 +265,12 @@ static void __afl_map_shm(void) { if (__afl_final_loc) { - if (__afl_final_loc % 32) - __afl_final_loc = (((__afl_final_loc + 31) >> 5) << 5); + if (__afl_final_loc % 64) { + + __afl_final_loc = (((__afl_final_loc + 63) >> 6) << 6); + + } + __afl_map_size = __afl_final_loc; if (__afl_final_loc > MAP_SIZE) { @@ -290,18 +315,23 @@ static void __afl_map_shm(void) { early-stage __afl_area_initial region that is needed to allow some really hacky .init code to work correctly in projects such as OpenSSL. */ - if (getenv("AFL_DEBUG")) + if (__afl_debug) { + fprintf(stderr, - "DEBUG: id_str %s, __afl_area_ptr %p, __afl_area_initial %p, " - "__afl_map_addr 0x%llx, MAP_SIZE %u, __afl_final_loc %u, " + "DEBUG: (1) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, " + "__afl_area_ptr_dummy 0x%p, __afl_map_addr 0x%llx, MAP_SIZE %u, " + "__afl_final_loc %u, " "max_size_forkserver %u/0x%x\n", id_str == NULL ? "<null>" : id_str, __afl_area_ptr, - __afl_area_initial, __afl_map_addr, MAP_SIZE, __afl_final_loc, - FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE); + __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE, + __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE); + + } if (id_str) { - if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial) { + if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial && + __afl_area_ptr != __afl_area_ptr_dummy) { if (__afl_map_addr) { @@ -323,7 +353,7 @@ static void __afl_map_shm(void) { unsigned char *shm_base = NULL; /* create the shared memory segment as if it was a file */ - shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION); if (shm_fd == -1) { fprintf(stderr, "shm_open() failed\n"); @@ -346,17 +376,18 @@ static void __afl_map_shm(void) { } - if (shm_base == MAP_FAILED) { + close(shm_fd); + shm_fd = -1; - close(shm_fd); - shm_fd = -1; + if (shm_base == MAP_FAILED) { fprintf(stderr, "mmap() failed\n"); + perror("mmap for map"); + if (__afl_map_addr) send_forkserver_error(FS_ERROR_MAP_ADDR); else send_forkserver_error(FS_ERROR_MMAP); - perror("mmap for map"); exit(2); @@ -368,8 +399,8 @@ static void __afl_map_shm(void) { if (__afl_map_size && __afl_map_size > MAP_SIZE) { - u8 *map_env = getenv("AFL_MAP_SIZE"); - if (!map_env || atoi(map_env) < MAP_SIZE) { + u8 *map_env = (u8 *)getenv("AFL_MAP_SIZE"); + if (!map_env || atoi((char *)map_env) < MAP_SIZE) { send_forkserver_error(FS_ERROR_MAP_SIZE); _exit(1); @@ -378,7 +409,7 @@ static void __afl_map_shm(void) { } - __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0); + __afl_area_ptr = (u8 *)shmat(shm_id, (void *)__afl_map_addr, 0); /* Whooooops. */ @@ -405,9 +436,9 @@ static void __afl_map_shm(void) { __afl_map_addr) { - __afl_area_ptr = - mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0); + __afl_area_ptr = (u8 *)mmap( + (void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (__afl_area_ptr == MAP_FAILED) { @@ -425,7 +456,7 @@ static void __afl_map_shm(void) { if (__afl_final_loc > MAP_INITIAL_SIZE) { - __afl_area_ptr = malloc(__afl_final_loc); + __afl_area_ptr = (u8 *)malloc(__afl_final_loc); } @@ -435,11 +466,24 @@ static void __afl_map_shm(void) { __afl_area_ptr_backup = __afl_area_ptr; + if (__afl_debug) { + + fprintf(stderr, + "DEBUG: (2) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, " + "__afl_area_ptr_dummy 0x%p, __afl_map_addr 0x%llx, MAP_SIZE " + "%u, __afl_final_loc %u, " + "max_size_forkserver %u/0x%x\n", + id_str == NULL ? "<null>" : id_str, __afl_area_ptr, + __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE, + __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE); + + } + if (__afl_selective_coverage) { if (__afl_map_size > MAP_INITIAL_SIZE) { - __afl_area_ptr_dummy = malloc(__afl_map_size); + __afl_area_ptr_dummy = (u8 *)malloc(__afl_map_size); if (__afl_area_ptr_dummy) { @@ -463,7 +507,7 @@ static void __afl_map_shm(void) { id_str = getenv(CMPLOG_SHM_ENV_VAR); - if (getenv("AFL_DEBUG")) { + if (__afl_debug) { fprintf(stderr, "DEBUG: cmplog id_str %s\n", id_str == NULL ? "<null>" : id_str); @@ -472,13 +516,19 @@ static void __afl_map_shm(void) { if (id_str) { + if ((__afl_dummy_fd[1] = open("/dev/null", O_WRONLY)) < 0) { + + if (pipe(__afl_dummy_fd) < 0) { __afl_dummy_fd[1] = 1; } + + } + #ifdef USEMMAP const char * shm_file_path = id_str; int shm_fd = -1; struct cmp_map *shm_base = NULL; /* create the shared memory segment as if it was a file */ - shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION); if (shm_fd == -1) { perror("shm_open() failed\n"); @@ -505,7 +555,7 @@ static void __afl_map_shm(void) { #else u32 shm_id = atoi(id_str); - __afl_cmp_map = shmat(shm_id, NULL, 0); + __afl_cmp_map = (struct cmp_map *)shmat(shm_id, NULL, 0); #endif __afl_cmp_map_backup = __afl_cmp_map; @@ -522,6 +572,58 @@ static void __afl_map_shm(void) { } +/* unmap SHM. */ + +static void __afl_unmap_shm(void) { + + if (!__afl_already_initialized_shm) return; + + char *id_str = getenv(SHM_ENV_VAR); + + if (id_str) { + +#ifdef USEMMAP + + munmap((void *)__afl_area_ptr, __afl_map_size); + +#else + + shmdt((void *)__afl_area_ptr); + +#endif + + } else if ((!__afl_area_ptr || __afl_area_ptr == __afl_area_initial) && + + __afl_map_addr) { + + munmap((void *)__afl_map_addr, __afl_map_size); + + } + + __afl_area_ptr = __afl_area_ptr_dummy; + + id_str = getenv(CMPLOG_SHM_ENV_VAR); + + if (id_str) { + +#ifdef USEMMAP + + munmap((void *)__afl_cmp_map, __afl_map_size); + +#else + + shmdt((void *)__afl_cmp_map); + +#endif + + __afl_cmp_map = NULL; + + } + + __afl_already_initialized_shm = 0; + +} + #ifdef __linux__ static void __afl_start_snapshots(void) { @@ -550,7 +652,7 @@ static void __afl_start_snapshots(void) { if (read(FORKSRV_FD, &was_killed, 4) != 4) { _exit(1); } - if (getenv("AFL_DEBUG")) { + if (__afl_debug) { fprintf(stderr, "target forkserver recv: %08x\n", was_killed); @@ -627,7 +729,7 @@ static void __afl_start_snapshots(void) { static uint32_t counter = 0; char fn[32]; sprintf(fn, "%09u:forkserver", counter); - s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); + s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION); if (fd_doc >= 0) { if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) { @@ -727,6 +829,9 @@ static void __afl_start_snapshots(void) { static void __afl_start_forkserver(void) { + if (__afl_already_initialized_forkserver) return; + __afl_already_initialized_forkserver = 1; + struct sigaction orig_action; sigaction(SIGTERM, NULL, &orig_action); old_sigterm_handler = orig_action.sa_handler; @@ -777,7 +882,7 @@ static void __afl_start_forkserver(void) { if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); - if (getenv("AFL_DEBUG")) { + if (__afl_debug) { fprintf(stderr, "target forkserver recv: %08x\n", was_killed); @@ -855,7 +960,7 @@ static void __afl_start_forkserver(void) { static uint32_t counter = 0; char fn[32]; sprintf(fn, "%09u:forkserver", counter); - s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); + s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION); if (fd_doc >= 0) { if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) { @@ -1012,11 +1117,14 @@ void __afl_manual_init(void) { __afl_sharedmem_fuzzing = 0; if (__afl_area_ptr == NULL) __afl_area_ptr = __afl_area_ptr_dummy; - if (getenv("AFL_DEBUG")) + if (__afl_debug) { + fprintf(stderr, "DEBUG: disabled instrumentation because of " "AFL_DISABLE_LLVM_INSTRUMENTATION\n"); + } + } if (!init_done) { @@ -1056,6 +1164,11 @@ __attribute__((constructor(CTOR_PRIO))) void __afl_auto_early(void) { __attribute__((constructor(1))) void __afl_auto_second(void) { + if (__afl_already_initialized_second) return; + __afl_already_initialized_second = 1; + + if (getenv("AFL_DEBUG")) { __afl_debug = 1; } + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; u8 *ptr; @@ -1080,17 +1193,18 @@ __attribute__((constructor(1))) void __afl_auto_second(void) { } -} +} // ptr memleak report is a false positive /* preset __afl_area_ptr #1 - at constructor level 0 global variables have not been set */ __attribute__((constructor(0))) void __afl_auto_first(void) { - if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; - u8 *ptr; + if (__afl_already_initialized_first) return; + __afl_already_initialized_first = 1; - ptr = (u8 *)malloc(1024000); + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; + u8 *ptr = (u8 *)malloc(MAP_INITIAL_SIZE); if (ptr && (ssize_t)ptr != -1) { @@ -1099,7 +1213,7 @@ __attribute__((constructor(0))) void __afl_auto_first(void) { } -} +} // ptr memleak report is a false positive /* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard. It remains non-operational in the traditional, plugin-backed LLVM mode. @@ -1167,11 +1281,13 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { _is_sancov = 1; - if (getenv("AFL_DEBUG")) { + if (__afl_debug) { fprintf(stderr, - "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges)\n", - start, stop, stop - start); + "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) " + "after_fs=%u\n", + start, stop, (unsigned long)(stop - start), + __afl_already_initialized_forkserver); } @@ -1187,6 +1303,40 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { } + /* instrumented code is loaded *after* our forkserver is up. this is a + problem. We cannot prevent collisions then :( */ + if (__afl_already_initialized_forkserver && + __afl_final_loc + 1 + stop - start > __afl_map_size) { + + if (__afl_debug) { + + fprintf(stderr, "Warning: new instrumented code after the forkserver!\n"); + + } + + __afl_final_loc = 2; + + if (1 + stop - start > __afl_map_size) { + + *(start++) = ++__afl_final_loc; + + while (start < stop) { + + if (R(100) < inst_ratio) + *start = ++__afl_final_loc % __afl_map_size; + else + *start = 0; + + start++; + + } + + return; + + } + + } + /* Make sure that the first element in the range is always set - we use that to avoid duplicate calls (which can happen as an artifact of the underlying implementation in LLVM). */ @@ -1204,6 +1354,28 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { } + if (__afl_debug) { + + fprintf(stderr, + "Done __sanitizer_cov_trace_pc_guard_init: __afl_final_loc = %u\n", + __afl_final_loc); + + } + + if (__afl_already_initialized_shm && __afl_final_loc > __afl_map_size) { + + if (__afl_debug) { + + fprintf(stderr, "Reinit shm necessary (+%u)\n", + __afl_final_loc - __afl_map_size); + + } + + __afl_unmap_shm(); + __afl_map_shm(); + + } + } ///// CmpLog instrumentation @@ -1455,24 +1627,48 @@ void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) { } +void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) { + + __cmplog_ins_hook1(arg1, arg2, 0); + +} + void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) { __cmplog_ins_hook2(arg1, arg2, 0); } +void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) { + + __cmplog_ins_hook2(arg1, arg2, 0); + +} + void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) { __cmplog_ins_hook4(arg1, arg2, 0); } +void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) { + + __cmplog_ins_hook4(arg1, arg2, 0); + +} + void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) { __cmplog_ins_hook8(arg1, arg2, 0); } +void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) { + + __cmplog_ins_hook8(arg1, arg2, 0); + +} + #ifdef WORD_SIZE_64 void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) { @@ -1480,6 +1676,12 @@ void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) { } +void __sanitizer_cov_trace_const_cmp16(uint128_t arg1, uint128_t arg2) { + + __cmplog_ins_hook16(arg1, arg2, 0); + +} + #endif void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { @@ -1523,17 +1725,43 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { } +__attribute__((weak)) void *__asan_region_is_poisoned(void *beg, size_t size) { + + return NULL; + +} + // POSIX shenanigan to see if an area is mapped. // If it is mapped as X-only, we have a problem, so maybe we should add a check // to avoid to call it on .text addresses -static int area_is_mapped(void *ptr, size_t len) { +static int area_is_valid(void *ptr, size_t len) { + + if (unlikely(!ptr || __asan_region_is_poisoned(ptr, len))) { return 0; } + + long r = syscall(SYS_write, __afl_dummy_fd[1], ptr, len); + + if (r <= 0 || r > len) return 0; + + // even if the write succeed this can be a false positive if we cross + // a page boundary. who knows why. + + char *p = (char *)ptr; + long page_size = sysconf(_SC_PAGE_SIZE); + char *page = (char *)((uintptr_t)p & ~(page_size - 1)) + page_size; - char *p = ptr; - char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1)); + if (page > p + len) { - int r = msync(page, (p - page) + len, MS_ASYNC); - if (r < 0) return errno != ENOMEM; - return 1; + // no, not crossing a page boundary + return (int)r; + + } else { + + // yes it crosses a boundary, hence we can only return the length of + // rest of the first page, we cannot detect if the next page is valid + // or not, neither by SYS_write nor msync() :-( + return (int)(page - p); + + } } @@ -1541,20 +1769,25 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { /* u32 i; - if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return; + if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return; fprintf(stderr, "rtn arg0="); - for (i = 0; i < 24; i++) + for (i = 0; i < 32; i++) fprintf(stderr, "%02x", ptr1[i]); fprintf(stderr, " arg1="); - for (i = 0; i < 24; i++) + for (i = 0; i < 32; i++) fprintf(stderr, "%02x", ptr2[i]); fprintf(stderr, "\n"); */ if (unlikely(!__afl_cmp_map)) return; + // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2); + int l1, l2; + if ((l1 = area_is_valid(ptr1, 32)) <= 0 || + (l2 = area_is_valid(ptr2, 32)) <= 0) + return; + int len = MIN(l1, l2); - if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return; - + // fprintf(stderr, "RTN2 %u\n", len); uintptr_t k = (uintptr_t)__builtin_return_address(0); k = (k >> 4) ^ (k << 8); k &= CMP_MAP_W - 1; @@ -1564,17 +1797,17 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) { __afl_cmp_map->headers[k].type = CMP_TYPE_RTN; - hits = 0; __afl_cmp_map->headers[k].hits = 1; - __afl_cmp_map->headers[k].shape = 31; + __afl_cmp_map->headers[k].shape = len - 1; + hits = 0; } else { hits = __afl_cmp_map->headers[k].hits++; - if (__afl_cmp_map->headers[k].shape < 31) { + if (__afl_cmp_map->headers[k].shape < len) { - __afl_cmp_map->headers[k].shape = 31; + __afl_cmp_map->headers[k].shape = len - 1; } @@ -1582,9 +1815,10 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { hits &= CMP_MAP_RTN_H - 1; __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, - ptr1, 32); + ptr1, len); __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, - ptr2, 32); + ptr2, len); + // fprintf(stderr, "RTN3\n"); } @@ -1629,12 +1863,20 @@ static u8 *get_llvm_stdstring(u8 *string) { void __cmplog_rtn_gcc_stdstring_cstring(u8 *stdstring, u8 *cstring) { + if (unlikely(!__afl_cmp_map)) return; + if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0) + return; + __cmplog_rtn_hook(get_gcc_stdstring(stdstring), cstring); } void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) { + if (unlikely(!__afl_cmp_map)) return; + if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0) + return; + __cmplog_rtn_hook(get_gcc_stdstring(stdstring1), get_gcc_stdstring(stdstring2)); @@ -1642,12 +1884,20 @@ void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) { void __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring, u8 *cstring) { + if (unlikely(!__afl_cmp_map)) return; + if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0) + return; + __cmplog_rtn_hook(get_llvm_stdstring(stdstring), cstring); } void __cmplog_rtn_llvm_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) { + if (unlikely(!__afl_cmp_map)) return; + if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0) + return; + __cmplog_rtn_hook(get_llvm_stdstring(stdstring1), get_llvm_stdstring(stdstring2)); diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc index a27c4069..74943fb2 100644 --- a/instrumentation/afl-llvm-common.cc +++ b/instrumentation/afl-llvm-common.cc @@ -60,20 +60,25 @@ bool isIgnoreFunction(const llvm::Function *F) { "asan.", "llvm.", "sancov.", - "__ubsan_", + "__ubsan", "ign.", - "__afl_", + "__afl", "_fini", - "__libc_csu", + "__libc_", "__asan", "__msan", "__cmplog", "__sancov", + "__san", + "__cxx_", + "__decide_deferred", + "_GLOBAL", + "_ZZN6__asan", + "_ZZN6__lsan", "msan.", "LLVMFuzzerM", "LLVMFuzzerC", "LLVMFuzzerI", - "__decide_deferred", "maybe_duplicate_stderr", "discard_output", "close_stdout", @@ -89,6 +94,20 @@ bool isIgnoreFunction(const llvm::Function *F) { } + static const char *ignoreSubstringList[] = { + + "__asan", "__msan", "__ubsan", "__lsan", + "__san", "__sanitize", "__cxx", "_GLOBAL__", + "DebugCounter", "DwarfDebug", "DebugLoc" + + }; + + for (auto const &ignoreListFunc : ignoreSubstringList) { + + if (F->getName().contains(ignoreListFunc)) { return true; } + + } + return false; } @@ -351,7 +370,7 @@ static std::string getSourceName(llvm::Function *F) { if (cDILoc) { instFilename = cDILoc->getFilename(); } - if (instFilename.str().empty()) { + if (instFilename.str().empty() && cDILoc) { /* If the original location is empty, try using the inlined location */ diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc index a4b33732..c954054b 100644 --- a/instrumentation/afl-llvm-dict2file.so.cc +++ b/instrumentation/afl-llvm-dict2file.so.cc @@ -90,7 +90,7 @@ void dict2file(int fd, u8 *mem, u32 len) { j = 1; for (i = 0; i < len; i++) { - if (isprint(mem[i])) { + if (isprint(mem[i]) && mem[i] != '\\' && mem[i] != '"') { line[j++] = mem[i]; @@ -357,6 +357,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { StringRef TmpStr; bool HasStr1; getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) { HasStr1 = false; @@ -403,7 +404,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { dyn_cast<ConstantDataArray>(Var->getInitializer())) { HasStr2 = true; - Str2 = Array->getAsString().str(); + Str2 = Array->getRawDataValues().str(); } @@ -430,7 +431,6 @@ bool AFLdict2filePass::runOnModule(Module &M) { if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte - addedNull = true; } @@ -480,7 +480,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { dyn_cast<ConstantDataArray>(Var->getInitializer())) { HasStr1 = true; - Str1 = Array->getAsString().str(); + Str1 = Array->getRawDataValues().str(); } @@ -521,14 +521,18 @@ bool AFLdict2filePass::runOnModule(Module &M) { optLen = thestring.length(); + if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; } + if (isMemcmp || isStrncmp || isStrncasecmp) { Value * op2 = callInst->getArgOperand(2); ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { uint64_t literalLength = optLen; optLen = ilen->getZExtValue(); + if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte thestring.append("\0", 1); addedNull = true; @@ -543,17 +547,21 @@ bool AFLdict2filePass::runOnModule(Module &M) { // was not already added if (!isMemcmp) { - if (addedNull == false) { + if (addedNull == false && thestring[optLen - 1] != '\0') { thestring.append("\0", 1); // add null byte optLen++; } - // ensure we do not have garbage - size_t offset = thestring.find('\0', 0); - if (offset + 1 < optLen) optLen = offset + 1; - thestring = thestring.substr(0, optLen); + if (!isStdString) { + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } } diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc index fa494f44..50306224 100644 --- a/instrumentation/afl-llvm-lto-instrumentation.so.cc +++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc @@ -69,7 +69,8 @@ class AFLLTOPass : public ModulePass { if (getenv("AFL_DEBUG")) debug = 1; if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL) - if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE) + if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 || + afl_global_id >= MAP_SIZE) FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n", ptr, MAP_SIZE - 1); @@ -518,7 +519,7 @@ bool AFLLTOPass::runOnModule(Module &M) { Var->getInitializer())) { HasStr2 = true; - Str2 = Array->getAsString().str(); + Str2 = Array->getRawDataValues().str(); } @@ -545,7 +546,7 @@ bool AFLLTOPass::runOnModule(Module &M) { if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte - addedNull = true; + // addedNull = true; } @@ -594,7 +595,7 @@ bool AFLLTOPass::runOnModule(Module &M) { Var->getInitializer())) { HasStr1 = true; - Str1 = Array->getAsString().str(); + Str1 = Array->getRawDataValues().str(); } @@ -634,15 +635,18 @@ bool AFLLTOPass::runOnModule(Module &M) { thestring = Str2; optLen = thestring.length(); + if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; } if (isMemcmp || isStrncmp || isStrncasecmp) { Value * op2 = callInst->getArgOperand(2); ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { uint64_t literalLength = optLen; optLen = ilen->getZExtValue(); + if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte thestring.append("\0", 1); addedNull = true; @@ -657,17 +661,21 @@ bool AFLLTOPass::runOnModule(Module &M) { // was not already added if (!isMemcmp) { - if (addedNull == false) { + if (addedNull == false && thestring[optLen - 1] != '\0') { thestring.append("\0", 1); // add null byte optLen++; } - // ensure we do not have garbage - size_t offset = thestring.find('\0', 0); - if (offset + 1 < optLen) optLen = offset + 1; - thestring = thestring.substr(0, optLen); + if (!isStdString) { + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } } @@ -923,9 +931,7 @@ bool AFLLTOPass::runOnModule(Module &M) { if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { - uint32_t write_loc = afl_global_id; - - if (afl_global_id % 32) write_loc = (((afl_global_id + 32) >> 4) << 4); + uint32_t write_loc = (((afl_global_id + 63) >> 6) << 6); GlobalVariable *AFLFinalLoc = new GlobalVariable( M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc"); diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc index 57ff3b47..0f773aba 100644 --- a/instrumentation/afl-llvm-pass.so.cc +++ b/instrumentation/afl-llvm-pass.so.cc @@ -62,7 +62,7 @@ typedef long double max_align_t; #endif #include "afl-llvm-common.h" -#include "llvm-ngram-coverage.h" +#include "llvm-alternative-coverage.h" using namespace llvm; @@ -82,9 +82,10 @@ class AFLCoverage : public ModulePass { protected: uint32_t ngram_size = 0; + uint32_t ctx_k = 0; uint32_t map_size = MAP_SIZE; uint32_t function_minimum_size = 1; - char * ctx_str = NULL, *skip_nozero = NULL; + char * ctx_str = NULL, *caller_str = NULL, *skip_nozero = NULL; }; @@ -183,10 +184,16 @@ bool AFLCoverage::runOnModule(Module &M) { skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO"); unsigned PrevLocSize = 0; + unsigned PrevCallerSize = 0; char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE"); if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE"); + char *ctx_k_str = getenv("AFL_LLVM_CTX_K"); + if (!ctx_k_str) ctx_k_str = getenv("AFL_CTX_K"); ctx_str = getenv("AFL_LLVM_CTX"); + caller_str = getenv("AFL_LLVM_CALLER"); + + bool instrument_ctx = ctx_str || caller_str; #ifdef AFL_HAVE_VECTOR_INTRINSICS /* Decide previous location vector size (must be a power of two) */ @@ -204,6 +211,31 @@ bool AFLCoverage::runOnModule(Module &M) { if (ngram_size) PrevLocSize = ngram_size - 1; else + PrevLocSize = 1; + + /* Decide K-ctx vector size (must be a power of two) */ + VectorType *PrevCallerTy = NULL; + + if (ctx_k_str) + if (sscanf(ctx_k_str, "%u", &ctx_k) != 1 || ctx_k < 1 || ctx_k > CTX_MAX_K) + FATAL("Bad value of AFL_CTX_K (must be between 1 and CTX_MAX_K (%u))", + CTX_MAX_K); + + if (ctx_k == 1) { + + ctx_k = 0; + instrument_ctx = true; + caller_str = ctx_k_str; // Enable CALLER instead + + } + + if (ctx_k) { + + PrevCallerSize = ctx_k; + instrument_ctx = true; + + } + #else if (ngram_size_str) #ifndef LLVM_VERSION_PATCH @@ -217,8 +249,20 @@ bool AFLCoverage::runOnModule(Module &M) { "%d.%d.%d!", LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH); #endif + if (ctx_k_str) + #ifndef LLVM_VERSION_PATCH + FATAL( + "Sorry, K-CTX branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0); + #else + FATAL( + "Sorry, K-CTX branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH); + #endif + PrevLocSize = 1; #endif - PrevLocSize = 1; #ifdef AFL_HAVE_VECTOR_INTRINSICS int PrevLocVecSize = PowerOf2Ceil(PrevLocSize); @@ -231,6 +275,17 @@ bool AFLCoverage::runOnModule(Module &M) { ); #endif +#ifdef AFL_HAVE_VECTOR_INTRINSICS + int PrevCallerVecSize = PowerOf2Ceil(PrevCallerSize); + if (ctx_k) + PrevCallerTy = VectorType::get(IntLocTy, PrevCallerVecSize + #if LLVM_VERSION_MAJOR >= 12 + , + false + #endif + ); +#endif + /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ @@ -238,9 +293,10 @@ bool AFLCoverage::runOnModule(Module &M) { new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); GlobalVariable *AFLPrevLoc; + GlobalVariable *AFLPrevCaller; GlobalVariable *AFLContext = NULL; - if (ctx_str) + if (ctx_str || caller_str) #if defined(__ANDROID__) || defined(__HAIKU__) AFLContext = new GlobalVariable( M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx"); @@ -275,6 +331,31 @@ bool AFLCoverage::runOnModule(Module &M) { #endif #ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ctx_k) + #if defined(__ANDROID__) || defined(__HAIKU__) + AFLPrevCaller = new GlobalVariable( + M, PrevCallerTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_caller"); + #else + AFLPrevCaller = new GlobalVariable( + M, PrevCallerTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_caller", + /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel, + /* AddressSpace */ 0, /* IsExternallyInitialized */ false); + #endif + else +#endif +#if defined(__ANDROID__) || defined(__HAIKU__) + AFLPrevCaller = + new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, + "__afl_prev_caller"); +#else + AFLPrevCaller = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_caller", + 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS /* Create the vector shuffle mask for updating the previous block history. Note that the first element of the vector will store cur_loc, so just set it to undef to allow the optimizer to do its thing. */ @@ -288,13 +369,30 @@ bool AFLCoverage::runOnModule(Module &M) { PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize)); Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle); + + Constant * PrevCallerShuffleMask = NULL; + SmallVector<Constant *, 32> PrevCallerShuffle = {UndefValue::get(Int32Ty)}; + + if (ctx_k) { + + for (unsigned I = 0; I < PrevCallerSize - 1; ++I) + PrevCallerShuffle.push_back(ConstantInt::get(Int32Ty, I)); + + for (int I = PrevCallerSize; I < PrevCallerVecSize; ++I) + PrevCallerShuffle.push_back(ConstantInt::get(Int32Ty, PrevCallerSize)); + + PrevCallerShuffleMask = ConstantVector::get(PrevCallerShuffle); + + } + #endif // other constants we need ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); ConstantInt *One = ConstantInt::get(Int8Ty, 1); - LoadInst *PrevCtx = NULL; // CTX sensitive coverage + Value * PrevCtx = NULL; // CTX sensitive coverage + LoadInst *PrevCaller = NULL; // K-CTX coverage /* Instrument all the things! */ @@ -318,12 +416,30 @@ bool AFLCoverage::runOnModule(Module &M) { IRBuilder<> IRB(&(*IP)); // Context sensitive coverage - if (ctx_str && &BB == &F.getEntryBlock()) { + if (instrument_ctx && &BB == &F.getEntryBlock()) { + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ctx_k) { + + PrevCaller = IRB.CreateLoad(AFLPrevCaller); + PrevCaller->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + PrevCtx = + IRB.CreateZExt(IRB.CreateXorReduce(PrevCaller), IRB.getInt32Ty()); + + } else + +#endif + { - // load the context ID of the previous function and write to to a local - // variable on the stack - PrevCtx = IRB.CreateLoad(AFLContext); - PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + // load the context ID of the previous function and write to to a + // local variable on the stack + LoadInst *PrevCtxLoad = IRB.CreateLoad(AFLContext); + PrevCtxLoad->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + PrevCtx = PrevCtxLoad; + + } // does the function have calls? and is any of the calls larger than one // basic block? @@ -354,10 +470,32 @@ bool AFLCoverage::runOnModule(Module &M) { // if yes we store a context ID for this function in the global var if (has_calls) { - ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size)); - StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext); - StoreCtx->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); + Value *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size)); +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ctx_k) { + + Value *ShuffledPrevCaller = IRB.CreateShuffleVector( + PrevCaller, UndefValue::get(PrevCallerTy), + PrevCallerShuffleMask); + Value *UpdatedPrevCaller = IRB.CreateInsertElement( + ShuffledPrevCaller, NewCtx, (uint64_t)0); + + StoreInst *Store = + IRB.CreateStore(UpdatedPrevCaller, AFLPrevCaller); + Store->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } else + +#endif + { + + if (ctx_str) NewCtx = IRB.CreateXor(PrevCtx, NewCtx); + StoreInst *StoreCtx = IRB.CreateStore(NewCtx, AFLContext); + StoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } } @@ -411,13 +549,20 @@ bool AFLCoverage::runOnModule(Module &M) { // in CTX mode we have to restore the original context for the caller - // she might be calling other functions which need the correct CTX - if (ctx_str && has_calls) { + if (instrument_ctx && has_calls) { Instruction *Inst = BB.getTerminator(); if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { IRBuilder<> Post_IRB(Inst); - StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + + StoreInst *RestoreCtx; + #ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ctx_k) + RestoreCtx = IRB.CreateStore(PrevCaller, AFLPrevCaller); + else + #endif + RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); @@ -458,7 +603,7 @@ bool AFLCoverage::runOnModule(Module &M) { #endif PrevLocTrans = PrevLoc; - if (ctx_str) + if (instrument_ctx) PrevLocTrans = IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty); else @@ -538,19 +683,27 @@ bool AFLCoverage::runOnModule(Module &M) { Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); } // in CTX mode we have to restore the original context for the caller - // she might be calling other functions which need the correct CTX. // Currently this is only needed for the Ubuntu clang-6.0 bug - if (ctx_str && has_calls) { + if (instrument_ctx && has_calls) { Instruction *Inst = BB.getTerminator(); if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { IRBuilder<> Post_IRB(Inst); - StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + + StoreInst *RestoreCtx; +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ctx_k) + RestoreCtx = IRB.CreateStore(PrevCaller, AFLPrevCaller); + else +#endif + RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc index b5cc1882..ad334d3b 100644 --- a/instrumentation/cmplog-instructions-pass.cc +++ b/instrumentation/cmplog-instructions-pass.cc @@ -19,12 +19,13 @@ #include <stdlib.h> #include <unistd.h> +#include <iostream> #include <list> #include <string> #include <fstream> #include <sys/time.h> -#include "llvm/Config/llvm-config.h" +#include "llvm/Config/llvm-config.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" @@ -265,13 +266,20 @@ bool CmpLogInstructions::hookInstrs(Module &M) { unsigned int max_size = Val->getType()->getIntegerBitWidth(), cast_size; unsigned char do_cast = 0; - if (!SI->getNumCases() || max_size < 16 || max_size % 8) { + if (!SI->getNumCases() || max_size < 16) { // if (!be_quiet) errs() << "skip trivial switch..\n"; continue; } + if (max_size % 8) { + + max_size = (((max_size / 8) + 1) * 8); + do_cast = 1; + + } + IRBuilder<> IRB(SI->getParent()); IRB.SetInsertPoint(SI); @@ -310,36 +318,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) { if (do_cast) { - ConstantInt *cint = dyn_cast<ConstantInt>(Val); - if (cint) { - - uint64_t val = cint->getZExtValue(); - // fprintf(stderr, "ConstantInt: %lu\n", val); - switch (cast_size) { - - case 8: - CompareTo = ConstantInt::get(Int8Ty, val); - break; - case 16: - CompareTo = ConstantInt::get(Int16Ty, val); - break; - case 32: - CompareTo = ConstantInt::get(Int32Ty, val); - break; - case 64: - CompareTo = ConstantInt::get(Int64Ty, val); - break; - case 128: - CompareTo = ConstantInt::get(Int128Ty, val); - break; - - } - - } else { - - CompareTo = IRB.CreateBitCast(Val, IntegerType::get(C, cast_size)); - - } + CompareTo = + IRB.CreateIntCast(CompareTo, IntegerType::get(C, cast_size), false); } @@ -361,27 +341,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) { if (do_cast) { - uint64_t val = cint->getZExtValue(); - // fprintf(stderr, "ConstantInt: %lu\n", val); - switch (cast_size) { - - case 8: - new_param = ConstantInt::get(Int8Ty, val); - break; - case 16: - new_param = ConstantInt::get(Int16Ty, val); - break; - case 32: - new_param = ConstantInt::get(Int32Ty, val); - break; - case 64: - new_param = ConstantInt::get(Int64Ty, val); - break; - case 128: - new_param = ConstantInt::get(Int128Ty, val); - break; - - } + new_param = + IRB.CreateIntCast(cint, IntegerType::get(C, cast_size), false); } @@ -457,7 +418,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) { IntegerType * intTyOp0 = NULL; IntegerType * intTyOp1 = NULL; unsigned max_size = 0, cast_size = 0; - unsigned char attr = 0, do_cast = 0; + unsigned char attr = 0; std::vector<Value *> args; CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst); @@ -523,7 +484,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) { max_size = 128; attr += 8; - do_cast = 1; } else { @@ -540,7 +500,9 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } - if (!max_size || max_size % 8 || max_size < 16) { continue; } + if (!max_size || max_size < 16) { continue; } + + if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); } if (max_size > 128) { @@ -553,7 +515,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } max_size = 128; - do_cast = 1; } @@ -569,92 +530,30 @@ bool CmpLogInstructions::hookInstrs(Module &M) { break; default: cast_size = 128; - do_cast = 1; } - if (do_cast) { - - // F*cking LLVM optimized out any kind of bitcasts of ConstantInt values - // creating illegal calls. WTF. So we have to work around this. - - ConstantInt *cint = dyn_cast<ConstantInt>(op0); - if (cint) { - - uint64_t val = cint->getZExtValue(); - // fprintf(stderr, "ConstantInt: %lu\n", val); - ConstantInt *new_param = NULL; - switch (cast_size) { - - case 8: - new_param = ConstantInt::get(Int8Ty, val); - break; - case 16: - new_param = ConstantInt::get(Int16Ty, val); - break; - case 32: - new_param = ConstantInt::get(Int32Ty, val); - break; - case 64: - new_param = ConstantInt::get(Int64Ty, val); - break; - case 128: - new_param = ConstantInt::get(Int128Ty, val); - break; - - } - - if (!new_param) { continue; } - args.push_back(new_param); - - } else { - - Value *V0 = IRB.CreateBitCast(op0, IntegerType::get(C, cast_size)); - args.push_back(V0); - - } - - cint = dyn_cast<ConstantInt>(op1); - if (cint) { - - uint64_t val = cint->getZExtValue(); - ConstantInt *new_param = NULL; - switch (cast_size) { - - case 8: - new_param = ConstantInt::get(Int8Ty, val); - break; - case 16: - new_param = ConstantInt::get(Int16Ty, val); - break; - case 32: - new_param = ConstantInt::get(Int32Ty, val); - break; - case 64: - new_param = ConstantInt::get(Int64Ty, val); - break; - case 128: - new_param = ConstantInt::get(Int128Ty, val); - break; - - } - - if (!new_param) { continue; } - args.push_back(new_param); - - } else { - - Value *V1 = IRB.CreateBitCast(op1, IntegerType::get(C, cast_size)); - args.push_back(V1); - - } - - } else { - - args.push_back(op0); - args.push_back(op1); - - } + // errs() << "[CMPLOG] cmp " << *cmpInst << "(in function " << + // cmpInst->getFunction()->getName() << ")\n"; + + // first bitcast to integer type of the same bitsize as the original + // type (this is a nop, if already integer) + Value *op0_i = IRB.CreateBitCast( + op0, IntegerType::get(C, op0->getType()->getPrimitiveSizeInBits())); + // then create a int cast, which does zext, trunc or bitcast. In our case + // usually zext to the next larger supported type (this is a nop if + // already the right type) + Value *V0 = + IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false); + args.push_back(V0); + Value *op1_i = IRB.CreateBitCast( + op1, IntegerType::get(C, op1->getType()->getPrimitiveSizeInBits())); + Value *V1 = + IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false); + args.push_back(V1); + + // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << *V1 + // << "\n"; ConstantInt *attribute = ConstantInt::get(Int8Ty, attr); args.push_back(attribute); @@ -667,7 +566,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n", - // max_size, cast_size, attr, do_cast); + // max_size, cast_size, attr); switch (cast_size) { diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc index da5cf7e9..3ecba4e6 100644 --- a/instrumentation/compare-transform-pass.so.cc +++ b/instrumentation/compare-transform-pass.so.cc @@ -229,9 +229,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, dyn_cast<ConstantDataArray>(Var->getInitializer())) { HasStr2 = true; - Str2 = Array->getAsString(); + Str2 = Array->getRawDataValues(); valueMap[Str2P] = new std::string(Str2.str()); - fprintf(stderr, "glo2 %s\n", Str2.str().c_str()); + // fprintf(stderr, "glo2 %s\n", Str2.str().c_str()); } @@ -254,7 +254,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, Var->getInitializer())) { HasStr1 = true; - Str1 = Array->getAsString(); + Str1 = Array->getRawDataValues(); valueMap[Str1P] = new std::string(Str1.str()); // fprintf(stderr, "glo1 %s\n", Str1.str().c_str()); @@ -316,7 +316,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, uint64_t len = ilen->getZExtValue(); // if len is zero this is a pointless call but allow real // implementation to worry about that - if (!len) continue; + if (len < 2) continue; if (isMemcmp) { @@ -362,19 +362,22 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, bool HasStr1 = getConstantStringInfo(Str1P, Str1); bool HasStr2 = getConstantStringInfo(Str2P, Str2); uint64_t constStrLen, unrollLen, constSizedLen = 0; - bool isMemcmp = - !callInst->getCalledFunction()->getName().compare(StringRef("memcmp")); - bool isSizedcmp = isMemcmp || - !callInst->getCalledFunction()->getName().compare( - StringRef("strncmp")) || - !callInst->getCalledFunction()->getName().compare( - StringRef("strncasecmp")); + bool isMemcmp = false; + bool isSizedcmp = false; + bool isCaseInsensitive = false; + Function * Callee = callInst->getCalledFunction(); + if (Callee) { + + isMemcmp = Callee->getName().compare("memcmp") == 0; + isSizedcmp = isMemcmp || Callee->getName().compare("strncmp") == 0 || + Callee->getName().compare("strncasecmp") == 0; + isCaseInsensitive = Callee->getName().compare("strcasecmp") == 0 || + Callee->getName().compare("strncasecmp") == 0; + + } + Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL; bool isConstSized = sizedValue && isa<ConstantInt>(sizedValue); - bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare( - StringRef("strcasecmp")) || - !callInst->getCalledFunction()->getName().compare( - StringRef("strncasecmp")); if (!(HasStr1 || HasStr2)) { @@ -391,7 +394,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, if (val && !val->empty()) { Str2 = StringRef(*val); - HasStr2 = true; + // HasStr2 = true; } @@ -417,15 +420,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } + if (TmpConstStr.length() < 2 || + (TmpConstStr.length() == 2 && !TmpConstStr[1])) { + + continue; + + } + // add null termination character implicit in c strings - TmpConstStr.append("\0", 1); + if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) { + + TmpConstStr.append("\0", 1); + + } // in the unusual case the const str has embedded null // characters, the string comparison functions should terminate // at the first null - if (!isMemcmp) + if (!isMemcmp) { + TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1); + } + constStrLen = TmpConstStr.length(); // prefer use of StringRef (in comparison to std::string a StringRef has // built-in runtime bounds checking, which makes debugging easier) @@ -436,15 +453,6 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, else unrollLen = constStrLen; - /* - if (!be_quiet) - errs() << callInst->getCalledFunction()->getName() << ": unroll len " - << unrollLen - << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ": - " - << ConstStr << "\n"; - */ - /* split before the call instruction */ BasicBlock *bb = callInst->getParent(); BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst)); diff --git a/instrumentation/llvm-ngram-coverage.h b/instrumentation/llvm-alternative-coverage.h index 666839c8..0d7b3957 100644 --- a/instrumentation/llvm-ngram-coverage.h +++ b/instrumentation/llvm-alternative-coverage.h @@ -14,5 +14,8 @@ typedef u64 PREV_LOC_T; /* Maximum ngram size */ #define NGRAM_SIZE_MAX 16U +/* Maximum K for top-K context sensitivity */ +#define CTX_MAX_K 32U + #endif diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc index b6d8c466..b02a89fb 100644 --- a/instrumentation/split-compares-pass.so.cc +++ b/instrumentation/split-compares-pass.so.cc @@ -149,8 +149,11 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { auto op1 = FcmpInst->getOperand(1); /* find out what the new predicate is going to be */ - auto pred = dyn_cast<CmpInst>(FcmpInst)->getPredicate(); + auto cmp_inst = dyn_cast<CmpInst>(FcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); CmpInst::Predicate new_pred; + switch (pred) { case CmpInst::FCMP_UGE: @@ -276,8 +279,11 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { auto op1 = IcmpInst->getOperand(1); /* find out what the new predicate is going to be */ - auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate(); + auto cmp_inst = dyn_cast<CmpInst>(IcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); CmpInst::Predicate new_pred; + switch (pred) { case CmpInst::ICMP_UGE: @@ -407,12 +413,16 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) { auto op1 = IcmpInst->getOperand(1); IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + if (!intTyOp0) { continue; } unsigned bitw = intTyOp0->getBitWidth(); IntegerType *IntType = IntegerType::get(C, bitw); /* get the new predicate */ - auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate(); + auto cmp_inst = dyn_cast<CmpInst>(IcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); CmpInst::Predicate new_pred; + if (pred == CmpInst::ICMP_SGT) { new_pred = CmpInst::ICMP_UGT; @@ -602,12 +612,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { if (op_size != op1->getType()->getPrimitiveSizeInBits()) { continue; } const unsigned int sizeInBits = op0->getType()->getPrimitiveSizeInBits(); + + // BUG FIXME TODO: u64 does not work for > 64 bit ... e.g. 80 and 128 bit + if (sizeInBits > 64) { continue; } + const unsigned int precision = sizeInBits == 32 ? 24 : sizeInBits == 64 ? 53 : sizeInBits == 128 ? 113 : sizeInBits == 16 ? 11 - /* sizeInBits == 80 */ - : 65; + : sizeInBits == 80 ? 65 + : sizeInBits - 8; const unsigned shiftR_exponent = precision - 1; const unsigned long long mask_fraction = @@ -1111,7 +1125,9 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { auto op0 = IcmpInst->getOperand(0); auto op1 = IcmpInst->getOperand(1); - auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate(); + auto cmp_inst = dyn_cast<CmpInst>(IcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); @@ -1300,12 +1316,9 @@ bool SplitComparesTransform::runOnModule(Module &M) { case 64: count += splitIntCompares(M, bitw); - /* - if (!be_quiet) - errs() << "Split-integer-compare-pass " << bitw << "bit: " << - count - << " split\n"; - */ + if (debug) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; bitw >>= 1; #if LLVM_VERSION_MAJOR > 3 || \ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) @@ -1313,12 +1326,9 @@ bool SplitComparesTransform::runOnModule(Module &M) { #endif case 32: count += splitIntCompares(M, bitw); - /* - if (!be_quiet) - errs() << "Split-integer-compare-pass " << bitw << "bit: " << - count - << " split\n"; - */ + if (debug) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; bitw >>= 1; #if LLVM_VERSION_MAJOR > 3 || \ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) @@ -1326,13 +1336,10 @@ bool SplitComparesTransform::runOnModule(Module &M) { #endif case 16: count += splitIntCompares(M, bitw); - /* - if (!be_quiet) - errs() << "Split-integer-compare-pass " << bitw << "bit: " << - count - << " split\n"; - */ - bitw >>= 1; + if (debug) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + // bitw >>= 1; break; default: |