diff options
Diffstat (limited to 'llvm_mode')
-rw-r--r-- | llvm_mode/GNUmakefile | 76 | ||||
-rw-r--r-- | llvm_mode/README.lto.md | 215 | ||||
-rw-r--r-- | llvm_mode/afl-clang-fast.c | 145 | ||||
-rw-r--r-- | llvm_mode/afl-ld.c | 839 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-lto-instrumentation.so.cc | 566 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-pass.so.cc | 68 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-rt-lto.o.c | 23 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-rt.o.c | 236 | ||||
-rw-r--r-- | llvm_mode/compare-transform-pass.so.cc | 143 |
9 files changed, 1072 insertions, 1239 deletions
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile index e3708efa..67c31f14 100644 --- a/llvm_mode/GNUmakefile +++ b/llvm_mode/GNUmakefile @@ -58,19 +58,20 @@ endif ifeq "$(LLVM_MAJOR)" "9" $(info [+] llvm_mode detected llvm 9, enabling neverZero implementation) - $(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation) - LLVM_LTO = 1 endif ifeq "$(LLVM_NEW_API)" "1" $(info [+] llvm_mode detected llvm 10+, enabling neverZero implementation and c++14) - $(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation) LLVM_STDCXX = c++14 +endif + +ifeq "$(LLVM_MAJOR)" "11" + $(info [+] llvm_mode detected llvm 11, enabling afl-clang-lto LTO implementation) LLVM_LTO = 1 endif ifeq "$(LLVM_LTO)" "0" - $(info [+] llvm_mode detected llvm < 9, afl-clang-lto LTO will not be build.) + $(info [+] llvm_mode detected llvm < 11, afl-clang-lto LTO will not be build.) endif ifeq "$(LLVM_APPLE)" "1" @@ -81,8 +82,8 @@ endif # this seems to be busted on some distros, so using the one in $PATH is # probably better. -CC = $(LLVM_BINDIR)/clang -CXX = $(LLVM_BINDIR)/clang++ +CC ?= $(LLVM_BINDIR)/clang +CXX ?= $(LLVM_BINDIR)/clang++ ifeq "$(shell test -e $(CC) || echo 1 )" "1" # llvm-config --bindir may not providing a valid path, so ... @@ -125,14 +126,18 @@ else endif endif -ifneq "$(AFL_CLANG_FLTO)" "" -ifeq "$(AFL_REAL_LD)" "" - AFL_REAL_LD = $(shell readlink /bin/ld 2>/dev/null) - ifeq "$(AFL_REAL_LD)" "" - AFL_REAL_LD = $(shell readlink /usr/bin/ld 2>/dev/null) +ifeq "$(LLVM_LTO)" "1" + ifneq "$(AFL_CLANG_FLTO)" "" + ifeq "$(AFL_REAL_LD)" "" + ifneq "$(shell readlink $(LLVM_BINDIR)/ld.lld 2>&1)" "" + AFL_REAL_LD = $(LLVM_BINDIR)/ld.lld + else + $(warn ld.lld not found, can not enable LTO mode) + LLVM_LTO = 0 + endif + endif endif endif -endif AFL_CLANG_FUSELD= ifneq "$(AFL_CLANG_FLTO)" "" @@ -141,13 +146,27 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -fuse-ld=`command -v endif endif +CLANG_BIN = $(basename $(CC)) +CLANGPP_BIN = $(basename $(CXX)) +ifeq "$(shell test -e $(CLANG_BIN) || echo 1 )" "1" + CLANG_BIN = $(CC) + CLANGPP_BIN = $(CXX) +endif + +ifeq "$(CC)" "$(LLVM_BINDIR)/clang" + USE_BINDIR = 1 +else + USE_BINDIR = 0 +endif + CFLAGS ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2 -override CFLAGS = -Wall \ +override CFLAGS += -Wall \ -g -Wno-pointer-sign -I ../include/ \ -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ -DLLVM_BINDIR=\"$(LLVM_BINDIR)\" -DVERSION=\"$(VERSION)\" \ -DLLVM_VERSION=\"$(LLVMVER)\" -DAFL_CLANG_FLTO=\"$(AFL_CLANG_FLTO)\" \ - -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -DAFL_CLANG_FUSELD=\"$(AFL_CLANG_FUSELD)\" -Wno-unused-function + -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -DAFL_CLANG_FUSELD=\"$(AFL_CLANG_FUSELD)\" \ + -DCLANG_BIN=\"$(CC)\" -DCLANGPP_BIN=\"$(CXX)\" -DUSE_BINDIR=$(USE_BINDIR) -Wno-unused-function ifdef AFL_TRACE_PC $(info Compile option AFL_TRACE_PC is deprecated, just set AFL_LLVM_INSTRUMENT=PCGUARD to activate when compiling targets ) endif @@ -183,7 +202,7 @@ ifeq "$(TEST_MMAP)" "1" LDFLAGS += -lrt endif - PROGS = ../afl-clang-fast ../afl-ld ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so + PROGS = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so # If prerequisites are not given, warn, do not build anything, and exit with code 0 ifeq "$(LLVMVER)" "" @@ -251,16 +270,6 @@ ifeq "$(LLVM_LTO)" "1" endif endif -../afl-ld: afl-ld.c -ifneq "$(AFL_CLANG_FLTO)" "" -ifeq "$(LLVM_LTO)" "1" - $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) - ln -sf afl-ld ../ld - @rm -f .test-instr - @-export AFL_QUIET=1 AFL_PATH=.. PATH="..:$(PATH)" ; ../afl-clang-lto -Wl,--afl -o .test-instr ../test-instr.c && echo "[+] afl-clang-lto and afl-ld seem to work fine :)" || echo "[!] WARNING: clang seems to have a hardcoded "'/bin/ld'" - check README.lto" - @rm -f .test-instr -endif -endif ../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps -$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) @@ -275,9 +284,12 @@ ifeq "$(LLVM_LTO)" "1" $(CXX) $(CLANG_CFL) -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) endif -../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc MarkNodes.cc +../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc ifeq "$(LLVM_LTO)" "1" - $(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) + $(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) + $(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto.o + @$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m64 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-64.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi + @$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m32 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-32.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi endif # laf @@ -296,15 +308,15 @@ endif $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps - $(CC) $(CFLAGS) -fPIC -c $< -o $@ + $(CC) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@ ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps @printf "[*] Building 32-bit variant of the runtime (-m32)... " - @$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + @$(CC) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps @printf "[*] Building 64-bit variant of the runtime (-m64)... " - @$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + @$(CC) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi test_build: $(PROGS) @echo "[*] Testing the CC wrapper and instrumentation output..." @@ -323,7 +335,7 @@ all_done: test_build install: all install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH) if [ -f ../afl-clang-fast -a -f ../libLLVMInsTrim.so -a -f ../afl-llvm-rt.o ]; then set -e; install -m 755 ../afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 ../libLLVMInsTrim.so ../afl-llvm-pass.so ../afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi - if [ -f ../afl-clang-lto -a -f ../afl-ld ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-ld $${DESTDIR}$(HELPER_PATH); ln -sf afl-ld $${DESTDIR}$(HELPER_PATH)/ld; install -m 755 ../afl-llvm-lto-instrumentation.so $${DESTDIR}$(HELPER_PATH); install -m 755 ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../afl-clang-lto ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-llvm-lto-instrumentation.so ../afl-llvm-rt-lto*.o ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi if [ -f ../afl-llvm-rt-32.o ]; then set -e; install -m 755 ../afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi if [ -f ../afl-llvm-rt-64.o ]; then set -e; install -m 755 ../afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi if [ -f ../compare-transform-pass.so ]; then set -e; install -m 755 ../compare-transform-pass.so $${DESTDIR}$(HELPER_PATH); fi @@ -363,4 +375,4 @@ endif clean: rm -f *.o *.so *~ a.out core core.[1-9][0-9]* .test2 test-instr .test-instr0 .test-instr1 *.dwo - rm -f $(PROGS) afl-common.o ../afl-clang-fast++ ../afl-clang-lto ../afl-clang-lto++ ../afl-clang*.8 ../ld + rm -f $(PROGS) afl-common.o ../afl-clang-fast++ ../afl-clang-lto ../afl-clang-lto++ ../afl-clang*.8 ../ld ../afl-ld ../afl-llvm-rt*.o diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md index 28b3b045..9af9ffff 100644 --- a/llvm_mode/README.lto.md +++ b/llvm_mode/README.lto.md @@ -2,31 +2,32 @@ ## TLDR; -1. This compile mode is very frickle if it works it is amazing, if it fails - - well use afl-clang-fast +This version requires a current llvm 11 compiled from the github master. -2. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better +1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better coverage than anything else that is out there in the AFL world -3. You can use it together with llvm_mode: laf-intel and whitelisting +2. You can use it together with llvm_mode: laf-intel and whitelisting features and can be combined with cmplog/Redqueen -4. It only works with llvm 9 (and likely 10+ but is not tested there yet) +3. It only works with llvm 11 (current github master state) + +4. AUTODICTIONARY feature! see below ## Introduction and problem description A big issue with how afl/afl++ works is that the basic block IDs that are -set during compilation are random - and hence natually the larger the number -of instrumented locations, the higher the number of edge collisions in the +set during compilation are random - and hence naturally the larger the number +of instrumented locations, the higher the number of edge collisions are in the map. This can result in not discovering new paths and therefore degrade the -efficiency of the fuzzing. +efficiency of the fuzzing process. -*This issue is understimated in the fuzzing community!* +*This issue is underestimated in the fuzzing community!* With a 2^16 = 64kb standard map at already 256 instrumented blocks there is on average one collision. On average a target has 10.000 to 50.000 instrumented blocks hence the real collisions are between 750-18.000! -To get to a solution that prevents any collision took several approaches +To reach a solution that prevents any collisions took several approaches and many dead ends until we got to this: * We instrument at link time when we have all files pre-compiled @@ -34,38 +35,48 @@ and many dead ends until we got to this: * Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the correct LTO options and runs our own afl-ld linker instead of the system linker - * Our linker collects all LTO files to link and instruments them so that + * The LLVM linker collects all LTO files to link and instruments them so that we have non-colliding edge overage * We use a new (for afl) edge coverage - which is the same as in llvm -fsanitize=coverage edge coverage mode :) - * after inserting our instrumentation in all interesting edges we link - all parts of the program together to our executable The result: - * 10-15% speed gain compared to llvm_mode + * 10-20% speed gain compared to llvm_mode * guaranteed non-colliding edge coverage :-) * The compile time especially for libraries can be longer Example build output from a libtiff build: ``` -/bin/bash ../libtool --tag=CC --mode=link afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/libtiff.la ../port/libport.la -llzma -ljbig -ljpeg -lz -lm libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm -afl-clang-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> -afl-ld++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> (level 0) -[+] Running ar unpacker on /prg/tests/lto/tiff-4.0.4/tools/../libtiff/.libs/libtiff.a into /tmp/.afl-3914343-1583339800.dir -[+] Running ar unpacker on /prg/tests/lto/tiff-4.0.4/tools/../port/.libs/libport.a into /tmp/.afl-3914343-1583339800.dir -[+] Running bitcode linker, creating /tmp/.afl-3914343-1583339800-1.ll -[+] Performing optimization via opt, creating /tmp/.afl-3914343-1583339800-2.bc -[+] Performing instrumentation via opt, creating /tmp/.afl-3914343-1583339800-3.bc -afl-llvm-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> -[+] Instrumented 15833 locations with no collisions (on average 1767 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode). -[+] Running real linker /bin/x86_64-linux-gnu-ld -[+] Linker was successful +afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO +afl-llvm-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> +AUTODICTIONARY: 11 strings found +[+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode). +``` + +## Building llvm 11 + +``` +$ sudo apt install binutils-dev # this is *essential*! +$ git clone https://github.com/llvm/llvm-project +$ cd llvm-project +$ mkdir build +$ cd build +$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/ +$ make -j $(nproc) +$ export PATH=`pwd`/bin:$PATH +$ export LLVM_CONFIG=`pwd`/bin/llcm-config +$ cd /path/to/AFLplusplus/ +$ make +$ cd llvm_mode +$ make +$ cd .. +$ make install ``` ## How to use afl-clang-lto -Just use afl-clang-lto like you did afl-clang-fast or afl-gcc. +Just use afl-clang-lto like you did with afl-clang-fast or afl-gcc. Also whitelisting (AFL_LLVM_WHITELIST -> [README.whitelist.md](README.whitelist.md)) and laf-intel/compcov (AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work. @@ -77,6 +88,13 @@ CC=afl-clang-lto CXX=afl-clang-lto++ ./configure make ``` +## AUTODICTIONARY feature + +Setting `AFL_LLVM_LTO_AUTODICTIONARY` will generate a dictionary in the +target binary based on string compare and memory compare functions. +afl-fuzz will automatically get these transmitted when starting to fuzz. +This improves coverage on a lot of targets. + ## Potential issues ### compiling libraries fails @@ -94,145 +112,16 @@ AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure - ``` and on some target you have to to AR=/RANLIB= even for make as the configure script does not save it ... -### "linking globals named '...': symbol multiply defined" error - -The target program is using multiple global variables or functions with the -same name. This is a common error when compiling a project with LTO, and -the fix is `-Wl,--allow-multiple-definition` - however llvm-link which we -need to link all llvm IR LTO files does not support this - yet (hopefully). -Hence if you see this error either you have to remove the duplicate global -variable (think `#ifdef` ...) or you are out of luck. :-( - -### "expected top-level entity" + binary ouput error - -This happens if multiple .a archives are to be linked and they contain the -same object filenames, the first in LTO form, the other in ELF form. -This can not be fixed programmatically, but can be fixed by hand. -You can try to delete the file from either archive -(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing -and instrumentation by hand (see below). - -### "undefined reference to ..." - -This *can* be the opposite situation of the "expected top-level entity" error - -the library with the ELF file is before the LTO library. -However it can also be a bug in the program - try to compile it normally. If -fails then it is a bug in the program. -Solutions: You can try to delete the file from either archive, e.g. -(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing -and instrumentation by hand (see below). - -### "File format not recognized" - -This happens if the build system has fixed LDFLAGS, CPPFLAGS, CXXFLAGS and/or -CFLAGS. Ensure that they all contain the `-flto` flag that afl-clang-lto was -compiled with (you can see that by typing `afl-clang-lto -h` and inspecting -the last line of the help output) and add them otherwise - -### clang is hardcoded to /bin/ld - -Some clang packages have 'ld' hardcoded to /bin/ld. This is an issue as this -prevents "our" afl-ld being called. - --fuse-ld=/path/to/afl-ld should be set through makefile magic in llvm_mode - -if it is supported - however if this fails you can try: -``` -LDFLAGS=-fuse-ld=</path/to/afl-ld -``` - -As workaround attempt #2 you will have to switch /bin/ld: -``` - mv /bin/ld /bin/ld.orig - cp afl-ld /bin/ld -``` -This can result in two problems though: - - !1! - When compiling afl-ld, the build process looks at where the /bin/ld link - is going to. So when the workaround was applied and a recompiling afl-ld - is performed then the link is gone and the new afl-ld clueless where - the real ld is. - In this case set AFL_REAL_LD=/bin/ld.orig - - !2! - When you install an updated gcc/clang/... package, your OS might restore - the ld link. - -### Performing the steps by hand - -It is possible to perform all the steps afl-ld by hand to workaround issues -in the target. - -1. Recompile with AFL_DEBUG=1 and collect the afl-clang-lto command that fails - e.g.: `AFL_DEBUG=1 make 2>&1 | grep afl-clang-lto | tail -n 1` - -2. run this command prepended with AFL_DEBUG=1 and collect the afl-ld command - parameters, e.g. `AFL_DEBUG=1 afl-clang-lto[++] .... | grep /afl/ld` - -3. for every .a archive you want to instrument unpack it into a seperate - directory, e.g. - `mkdir archive1.dir ; cd archive1.dir ; llvm-link x ../<archive>.a` - -4. run `file archive*.dir/*.o` and make two lists, one containing all ELF files - and one containing all LLVM IR bitcode files. - You do the same for all .o files of the ../afl/ld command options - -5. Create a single bitcode file by using llvm-link, e.g. - `llvm-link -o all-bitcode.bc <list of all LLVM IR .o files>` - If this fails it is game over - or you modify the source code - -6. Run the optimizer on the new bitcode file: - `opt -O3 --polly -o all-optimized.bc all-bitcode.bc` - -7. Instrument the optimized bitcode file: - `opt --load=$AFL_PATH/afl-llvm-lto-instrumentation.so --disable-opt --afl-lto all-optimized.bc -o all-instrumented.bc - -8. If the parameter `--allow-multiple-definition` is not in the list, add it - as first command line option. - -9. Link everything together. - a) You use the afl-ld command and instead of e.g. `/usr/local/lib/afl/ld` - you replace that with `ld`, the real linker. - b) Every .a archive you instrumented files from you remove the <archive>.a - or -l<archive> from the command - c) If you have entries in your ELF files list (see step 4), you put them to - the command line - but them in the same order! - d) put the all-instrumented.bc before the first library or .o file - e) run the command and hope it compiles, if it doesn't you have to analyze - what the issue is and fix that in the approriate step above. - -Yes this is long and complicated. That is why there is afl-ld doing this and -that why this can easily fail and not all different ways how it *can* fail can -be implemented ... - ### compiling programs still fail afl-clang-lto is still work in progress. -Complex targets are still likely not to compile and this needs to be fixed. Please report issues at: [https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226) -Known issues: -* ffmpeg -* bogofilter -* libjpeg-turbo-1.3.1 - ## Upcoming Work -1. Currently the LTO whitelist feature does not allow to not instrument main, start and init functions -2. Modify the forkserver + afl-fuzz so that only the necessary map size is - loaded and used - and communicated to afl-fuzz too. - Result: faster fork in the target and faster map analysis in afl-fuzz - => more speed :-) - -## Tested and working targets - -* libpng-1.2.53 -* libxml2-2.9.2 -* tiff-4.0.4 -* unrar-nonfree-5.6.6 -* exiv 0.27 -* jpeg-6b +1. Currently the LTO whitelist feature does not allow to instrument main, + start and init functions ## History @@ -249,14 +138,20 @@ This was first implemented in January and work ... kinda. The LTO time instrumentation worked, however the "how" the basic blocks were instrumented was a problem, as reducing duplicates turned out to be very, very difficult with a program that has so many paths and therefore so many -dependencies. At lot of stratgies were implemented - and failed. +dependencies. At lot of strategies were implemented - and failed. And then sat solvers were tried, but with over 10.000 variables that turned out to be a dead-end too. + The final idea to solve this came from domenukk who proposed to insert a block into an edge and then just use incremental counters ... and this worked! After some trials and errors to implement this vanhauser-thc found out that there is actually an llvm function for this: SplitEdge() :-) + Still more problems came up though as this only works without bugs from llvm 9 onwards, and with high optimization the link optimization ruins the instrumented control flow graph. -As long as there are no larger changes in llvm this all should work well now ... + +This is all now fixed with llvm 11. The llvm's own linker is now able to +load passes and this bypasses all problems we had. + +Happy end :) diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index e2b44def..3de5fd7d 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -159,7 +159,6 @@ static void find_obj(u8 *argv0) { static void edit_params(u32 argc, char **argv, char **envp) { u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0; - u8 has_llvm_config = 0; u8 *name; cc_params = ck_alloc((argc + 128) * sizeof(u8 *)); @@ -170,8 +169,6 @@ static void edit_params(u32 argc, char **argv, char **envp) { else ++name; - has_llvm_config = (strlen(LLVM_BINDIR) > 0); - if (instrument_mode == INSTRUMENT_LTO) if (lto_flag[0] != '-') FATAL( @@ -181,20 +178,29 @@ static void edit_params(u32 argc, char **argv, char **envp) { if (!strcmp(name, "afl-clang-fast++") || !strcmp(name, "afl-clang-lto++")) { u8 *alt_cxx = getenv("AFL_CXX"); - if (has_llvm_config) + if (USE_BINDIR) snprintf(llvm_fullpath, sizeof(llvm_fullpath), "%s/clang++", LLVM_BINDIR); else - sprintf(llvm_fullpath, "clang++"); - cc_params[0] = alt_cxx ? alt_cxx : (u8 *)llvm_fullpath; + sprintf(llvm_fullpath, CLANGPP_BIN); + cc_params[0] = alt_cxx && *alt_cxx ? alt_cxx : (u8 *)llvm_fullpath; - } else { + } else if (!strcmp(name, "afl-clang-fast") || + + !strcmp(name, "afl-clang-lto")) { u8 *alt_cc = getenv("AFL_CC"); - if (has_llvm_config) + if (USE_BINDIR) snprintf(llvm_fullpath, sizeof(llvm_fullpath), "%s/clang", LLVM_BINDIR); else - sprintf(llvm_fullpath, "clang"); - cc_params[0] = alt_cc ? alt_cc : (u8 *)llvm_fullpath; + sprintf(llvm_fullpath, CLANG_BIN); + cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)llvm_fullpath; + + } else { + + fprintf(stderr, "Name of the binary: %s\n", argv[0]); + FATAL( + "Name of the binary is not a known name, expected afl-clang-fast(++) " + "or afl-clang-lto(++)"); } @@ -220,6 +226,13 @@ static void edit_params(u32 argc, char **argv, char **envp) { if (getenv("LAF_TRANSFORM_COMPARES") || getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES")) { + if (!be_quiet && getenv("AFL_LLVM_LTO_AUTODICTIONARY") && + instrument_mode != INSTRUMENT_LTO) + WARNF( + "using AFL_LLVM_LAF_TRANSFORM_COMPARES together with " + "AFL_LLVM_LTO_AUTODICTIONARY makes no sense. Use only " + "AFL_LLVM_LTO_AUTODICTIONARY."); + cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; @@ -269,12 +282,6 @@ static void edit_params(u32 argc, char **argv, char **envp) { if (instrument_mode == INSTRUMENT_LTO) { - char *old_path = getenv("PATH"); - char *new_path = alloc_printf("%s:%s", AFL_PATH, old_path); - - setenv("PATH", new_path, 1); - setenv("AFL_LD", "1", 1); - if (getenv("AFL_LLVM_WHITELIST") != NULL) { cc_params[cc_par_cnt++] = "-Xclang"; @@ -285,13 +292,10 @@ static void edit_params(u32 argc, char **argv, char **envp) { } -#ifdef AFL_CLANG_FUSELD - cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s/afl-ld", AFL_PATH); -#endif - - cc_params[cc_par_cnt++] = "-B"; - cc_params[cc_par_cnt++] = AFL_PATH; - + cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD); + cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition"; + cc_params[cc_par_cnt++] = alloc_printf( + "-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path); cc_params[cc_par_cnt++] = lto_flag; } else { @@ -410,7 +414,11 @@ static void edit_params(u32 argc, char **argv, char **envp) { } - if (getenv("AFL_NO_BUILTIN")) { + if (getenv("AFL_NO_BUILTIN") || getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES") || + getenv("LAF_TRANSFORM_COMPARES") || + (instrument_mode == INSTRUMENT_LTO && + (getenv("AFL_LLVM_LTO_AUTODICTIONARY") || + getenv("AFL_LLVM_AUTODICTIONARY")))) { cc_params[cc_par_cnt++] = "-fno-builtin-strcmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncmp"; @@ -491,21 +499,38 @@ static void edit_params(u32 argc, char **argv, char **envp) { case 0: cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path); + if (instrument_mode == INSTRUMENT_LTO) + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-rt-lto.o", obj_path); break; case 32: cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path); - if (access(cc_params[cc_par_cnt - 1], R_OK)) FATAL("-m32 is not supported by your compiler"); + if (instrument_mode == INSTRUMENT_LTO) { + + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-rt-lto-32.o", obj_path); + if (access(cc_params[cc_par_cnt - 1], R_OK)) + FATAL("-m32 is not supported by your compiler"); + + } break; case 64: cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path); - if (access(cc_params[cc_par_cnt - 1], R_OK)) FATAL("-m64 is not supported by your compiler"); + if (instrument_mode == INSTRUMENT_LTO) { + + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-rt-lto-64.o", obj_path); + if (access(cc_params[cc_par_cnt - 1], R_OK)) + FATAL("-m64 is not supported by your compiler"); + + } break; @@ -539,6 +564,10 @@ int main(int argc, char **argv, char **envp) { if ((ptr = getenv("AFL_LLVM_INSTRUMENT")) != NULL) { + if (strncasecmp(ptr, "default", strlen("default")) == 0 || + strncasecmp(ptr, "afl", strlen("afl")) == 0 || + strncasecmp(ptr, "classic", strlen("classic")) == 0) + instrument_mode = INSTRUMENT_DEFAULT; if (strncasecmp(ptr, "cfg", strlen("cfg")) == 0 || strncasecmp(ptr, "instrim", strlen("instrim")) == 0) instrument_mode = INSTRUMENT_CFG; @@ -692,30 +721,30 @@ int main(int argc, char **argv, char **envp) { "Environment variables used:\n" "AFL_CC: path to the C compiler to use\n" "AFL_CXX: path to the C++ compiler to use\n" - "AFL_PATH: path to instrumenting pass and runtime " - "(afl-llvm-rt.*o)\n" - "AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n" - "AFL_NO_BUILTIN: compile for use with libtokencap.so\n" - "AFL_INST_RATIO: percentage of branches to instrument\n" - "AFL_QUIET: suppress verbose output\n" "AFL_DEBUG: enable developer debugging output\n" + "AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n" "AFL_HARDEN: adds code hardening to catch memory bugs\n" - "AFL_USE_ASAN: activate address sanitizer\n" - "AFL_USE_MSAN: activate memory sanitizer\n" - "AFL_USE_UBSAN: activate undefined behaviour sanitizer\n" - "AFL_USE_CFISAN: activate control flow sanitizer\n" - "AFL_LLVM_WHITELIST: enable whitelisting (selective " - "instrumentation)\n" + "AFL_INST_RATIO: percentage of branches to instrument\n" "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n" "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n" - "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n" - "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison " - "function calls\n" - " to cascaded comparisons\n" "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to " "cascaded " "comp.\n" - "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n", + "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n" + " to cascaded comparisons\n" + "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison " + "function calls\n" + "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n" + "AFL_LLVM_WHITELIST: enable whitelisting (selective " + "instrumentation)\n" + "AFL_NO_BUILTIN: compile for use with libtokencap.so\n" + "AFL_PATH: path to instrumenting pass and runtime " + "(afl-llvm-rt.*o)\n" + "AFL_QUIET: suppress verbose output\n" + "AFL_USE_ASAN: activate address sanitizer\n" + "AFL_USE_CFISAN: activate control flow sanitizer\n" + "AFL_USE_MSAN: activate memory sanitizer\n" + "AFL_USE_UBSAN: activate undefined behaviour sanitizer\n", callname, BIN_PATH, BIN_PATH); SAYF( @@ -723,24 +752,22 @@ int main(int argc, char **argv, char **envp) { "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n" "AFL_LLVM_INSTRUMENT: set instrumentation mode: DEFAULT, CFG " "(INSTRIM), LTO, CTX, NGRAM-2 ... NGRAM-16\n" - "You can also use the old environment variables:" - "AFL_LLVM_CTX: use context sensitive coverage\n" - "AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n" - "AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage\n" - "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n" - "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (sub " + " You can also use the old environment variables instead:" + " AFL_LLVM_CTX: use context sensitive coverage\n" + " AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n" + " AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage\n" + " AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n" + " AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (sub " "option to INSTRIM)\n"); #ifdef AFL_CLANG_FLTO SAYF( "\nafl-clang-lto specific environment variables:\n" - "AFL_LLVM_LTO_STARTID: from which ID to start counting from for a " - "bb\n" "AFL_LLVM_LTO_DONTWRITEID: don't write the highest ID used to a " "global var\n" - "AFL_REAL_LD: use this linker instead of the compiled in path\n" - "AFL_LD_PASSTHROUGH: do not perform instrumentation (for configure " - "scripts)\n" + "AFL_LLVM_LTO_STARTID: from which ID to start counting from for a " + "bb\n" + "AFL_REAL_LD: use this lld linker instead of the compiled in path\n" "\nafl-clang-lto was built with linker target \"%s\" and LTO flags " "\"%s\"\n" "If anything fails - be sure to read README.lto.md!\n", @@ -774,6 +801,16 @@ int main(int argc, char **argv, char **envp) { } + u8 *ptr2; + if (!be_quiet && instrument_mode != INSTRUMENT_LTO && + ((ptr2 = getenv("AFL_MAP_SIZE")) || (ptr2 = getenv("AFL_MAPSIZE")))) { + + u32 map_size = atoi(ptr2); + if (map_size != MAP_SIZE) + FATAL("AFL_MAP_SIZE is not supported by afl-clang-fast"); + + } + if (debug) { SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); diff --git a/llvm_mode/afl-ld.c b/llvm_mode/afl-ld.c deleted file mode 100644 index eb46c85c..00000000 --- a/llvm_mode/afl-ld.c +++ /dev/null @@ -1,839 +0,0 @@ -/* - american fuzzy lop++ - wrapper for GNU ld - ----------------------------------------- - - Written by Marc Heuse <mh@mh-sec.de> for afl++ - - Maintained by Marc Heuse <mh@mh-sec.de>, - Heiko Eißfeldt <heiko.eissfeldt@hexco.de> - Andrea Fioraldi <andreafioraldi@gmail.com> - Dominik Maier <domenukk@gmail.com> - - Copyright 2019-2020 AFLplusplus Project. All rights reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - The sole purpose of this wrapper is to preprocess clang LTO files before - linking by ld and perform the instrumentation on the whole program. - -*/ - -#define AFL_MAIN - -#include "config.h" -#include "types.h" -#include "debug.h" -#include "alloc-inl.h" - -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> -#include <ctype.h> -#include <fcntl.h> - -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/time.h> - -#include <dirent.h> - -#define MAX_PARAM_COUNT 4096 - -static u8 **ld_params, /* Parameters passed to the real 'ld' */ - **link_params, /* Parameters passed to 'llvm-link' */ - **opt_params, /* Parameters passed to 'opt' opt */ - **inst_params; /* Parameters passed to 'opt' inst */ - -static u8 *input_file; /* Originally specified input file */ -static u8 *final_file, /* Instrumented file for the real 'ld' */ - *linked_file, /* file where we link all files */ - *modified_file; /* file that was optimized before instr */ -static u8 *afl_path = AFL_PATH; -static u8 *real_ld = AFL_REAL_LD; -static u8 cwd[4096]; -static u8 *tmp_dir; -static u8 *ar_dir; -static u8 ar_dir_cnt; -static u8 *libdirs[254]; -static u8 libdir_cnt; - -static u8 be_quiet, /* Quiet mode (no stderr output) */ - debug, /* AFL_DEBUG */ - passthrough, /* AFL_LD_PASSTHROUGH - no link+optimize*/ - we_link, /* we have bc/ll -> link + optimize */ - just_version; /* Just show version? */ - -static u32 ld_param_cnt = 1, /* Number of params to 'ld' */ - link_param_cnt = 1, /* Number of params to 'llvm-link' */ - opt_param_cnt = 1, /* Number of params to 'opt' opt */ - inst_param_cnt = 1; /* Number of params to 'opt' instr */ - -/* This function wipes a directory - our AR unpack directory in this case */ -static u8 wipe_directory(u8 *path) { - - DIR * d; - struct dirent *d_ent; - - d = opendir(path); - - if (!d) return 0; - - while ((d_ent = readdir(d))) { - - if (strcmp(d_ent->d_name, ".") != 0 && strcmp(d_ent->d_name, "..") != 0) { - - u8 *fname = alloc_printf("%s/%s", path, d_ent->d_name); - if (unlink(fname)) PFATAL("Unable to delete '%s'", fname); - ck_free(fname); - - } - - } - - closedir(d); - - return !!rmdir(path); - -} - -/* remove temporary files on fatal errors */ -static void at_exit_handler(void) { - - if (!getenv("AFL_KEEP_ASSEMBLY")) { - - if (linked_file) { - - unlink(linked_file); - linked_file = NULL; - - } - - if (modified_file) { - - unlink(modified_file); - modified_file = NULL; - - } - - if (final_file) { - - unlink(final_file); - final_file = NULL; - - } - - if (ar_dir != NULL) { - - wipe_directory(ar_dir); - ar_dir = NULL; - - } - - } - -} - -/* This function checks if the parameter is a) an existing file and b) - if it is a BC or LL file, if both are true it returns 1 and 0 otherwise */ -int is_llvm_file(const char *file) { - - int fd; - u8 buf[5]; - - if ((fd = open(file, O_RDONLY)) < 0) { - - if (debug) SAYF(cMGN "[D] " cRST "File %s not found", file); - return 0; - - } - - if (read(fd, buf, 4) != 4) return 0; - buf[sizeof(buf) - 1] = 0; - - close(fd); - - if (strncmp(buf, "; Mo", 4) == 0) return 1; - - if (buf[0] == 'B' && buf[1] == 'C' && buf[2] == 0xc0 && buf[3] == 0xde) - return 1; - - return 0; - -} - -/* Return the current working directory, not thread safe ;-) */ -u8 *getthecwd() { - - static u8 fail[] = ""; - if (getcwd(cwd, sizeof(cwd)) == NULL) return fail; - return cwd; - -} - -/* Check if an ar extracted file is already in the parameter list */ -int is_duplicate(u8 **params, u32 ld_param_cnt, u8 *ar_file) { - - for (uint32_t i = 0; i < ld_param_cnt; i++) - if (params[i] != NULL) - if (strcmp(params[i], ar_file) == 0) return 1; - - return 0; - -} - -/* Examine and modify parameters to pass to 'ld', 'llvm-link' and 'llmv-ar'. - Note that the file name is always the last parameter passed by GCC, - so we exploit this property to keep the code "simple". */ -static void edit_params(int argc, char **argv) { - - u32 i, have_lto = 0, libdir_index; - u8 libdir_file[4096]; - - if (tmp_dir == NULL) { - - tmp_dir = getenv("TMPDIR"); - if (!tmp_dir) tmp_dir = getenv("TEMP"); - if (!tmp_dir) tmp_dir = getenv("TMP"); - if (!tmp_dir) tmp_dir = "/tmp"; - - } - - linked_file = - alloc_printf("%s/.afl-%u-%u-1.ll", tmp_dir, getpid(), (u32)time(NULL)); - modified_file = - alloc_printf("%s/.afl-%u-%u-2.bc", tmp_dir, getpid(), (u32)time(NULL)); - final_file = - alloc_printf("%s/.afl-%u-%u-3.bc", tmp_dir, getpid(), (u32)time(NULL)); - - ld_params = ck_alloc(4096 * sizeof(u8 *)); - link_params = ck_alloc(4096 * sizeof(u8 *)); - inst_params = ck_alloc(12 * sizeof(u8 *)); - opt_params = ck_alloc(12 * sizeof(u8 *)); - - ld_params[0] = (u8 *)real_ld; - ld_params[ld_param_cnt++] = "--allow-multiple-definition"; - - link_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-link"); - link_params[link_param_cnt++] = "-S"; // we create the linked file as .ll - link_params[link_param_cnt++] = "-o"; - link_params[link_param_cnt++] = linked_file; - - opt_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt"); - if (getenv("AFL_DONT_OPTIMIZE") == NULL) - opt_params[opt_param_cnt++] = "-O3"; - else - opt_params[opt_param_cnt++] = "-O0"; - - // opt_params[opt_param_cnt++] = "-S"; // only when debugging - opt_params[opt_param_cnt++] = linked_file; // input: .ll file - opt_params[opt_param_cnt++] = "-o"; - opt_params[opt_param_cnt++] = modified_file; // output: .bc file - - inst_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt"); - inst_params[inst_param_cnt++] = - alloc_printf("--load=%s/afl-llvm-lto-instrumentation.so", afl_path); - // inst_params[inst_param_cnt++] = "-S"; // only when debugging - inst_params[inst_param_cnt++] = "--disable-opt"; - inst_params[inst_param_cnt++] = "--afl-lto"; - inst_params[inst_param_cnt++] = modified_file; // input: .bc file - inst_params[inst_param_cnt++] = "-o"; - inst_params[inst_param_cnt++] = final_file; // output: .bc file - - // first we must collect all library search paths - for (i = 1; i < argc; i++) - if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == 'L') - libdirs[libdir_cnt++] = argv[i] + 2; - - // then we inspect all options to the target linker - for (i = 1; i < argc; i++) { - - if (ld_param_cnt >= MAX_PARAM_COUNT || link_param_cnt >= MAX_PARAM_COUNT) - FATAL( - "Too many command line parameters because of unpacking .a archives, " - "this would need to be done by hand ... sorry! :-("); - - if (strncmp(argv[i], "-flto", 5) == 0) have_lto = 1; - - if (!strcmp(argv[i], "-version")) { - - just_version = 1; - ld_params[1] = argv[i]; - ld_params[2] = NULL; - final_file = input_file; - return; - - } - - if (strcmp(argv[i], "--afl") == 0) { - - if (!be_quiet) OKF("afl++ test command line flag detected, exiting."); - exit(0); - - } - - // if a -l library is linked and no .so is found but an .a archive is there - // then the archive will be used. So we have to emulate this and check - // if an archive will be used and if yes we will instrument it too - libdir_file[0] = 0; - libdir_index = libdir_cnt; - if (strncmp(argv[i], "-l", 2) == 0 && libdir_cnt > 0 && - strncmp(argv[i], "-lgcc", 5) != 0) { - - u8 found = 0; - - for (uint32_t j = 0; j < libdir_cnt && !found; j++) { - - snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j], - argv[i] + 2, ".so"); - if (access(libdir_file, R_OK) != 0) { // no .so found? - - snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j], - argv[i] + 2, ".a"); - if (access(libdir_file, R_OK) == 0) { // but .a found? - - libdir_index = j; - found = 1; - if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file); - - } - - } else { - - found = 1; - if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file); - - } - - } - - } - - // is the parameter an .a AR archive? If so, unpack and check its files - if (libdir_index < libdir_cnt || - (argv[i][0] != '-' && strlen(argv[i]) > 2 && - argv[i][strlen(argv[i]) - 1] == 'a' && - argv[i][strlen(argv[i]) - 2] == '.')) { - - // This gets a bit odd. I encountered several .a files being linked and - // where the same "foo.o" was in both .a archives. llvm-link does not - // like this so we have to work around that ... - - u8 this_wd[4096], *this_ar; - u8 ar_params_cnt = 4; - u8 * ar_params[ar_params_cnt]; - u8 * file = argv[i]; - s32 pid, status; - DIR * arx; - struct dirent *dir_ent; - - if (libdir_index < libdir_cnt) file = libdir_file; - - if (ar_dir_cnt == 0) { // first archive, we setup up the basics - - ar_dir = alloc_printf("%s/.afl-%u-%u.dir", tmp_dir, getpid(), - (u32)time(NULL)); - if (mkdir(ar_dir, 0700) != 0) - FATAL("can not create temporary directory %s", ar_dir); - - } - - if (getcwd(this_wd, sizeof(this_wd)) == NULL) - FATAL("can not get the current working directory"); - if (chdir(ar_dir) != 0) - FATAL("can not chdir to temporary directory %s", ar_dir); - if (file[0] == '/') - this_ar = file; - else - this_ar = alloc_printf("%s/%s", this_wd, file); - ar_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-ar"); - ar_params[1] = "x"; - ar_params[2] = this_ar; - ar_params[3] = NULL; - - if (!be_quiet) OKF("Running ar unpacker on %s into %s", this_ar, ar_dir); - - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (uint32_t j = 0; j < ar_params_cnt; j++) - SAYF(" \"%s\"", ar_params[j]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(ar_params[0], (char **)ar_params); - FATAL("Oops, failed to execute '%s'", ar_params[0]); - - } - - if (pid < 0) FATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) FATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status)); - - if (chdir(this_wd) != 0) - FATAL("can not chdir back to our working directory %s", this_wd); - - if (!(arx = opendir(ar_dir))) FATAL("can not open directory %s", ar_dir); - - while ((dir_ent = readdir(arx)) != NULL) { - - u8 *ar_file = alloc_printf("%s/%s", ar_dir, dir_ent->d_name); - - if (dir_ent->d_name[strlen(dir_ent->d_name) - 1] == 'o' && - dir_ent->d_name[strlen(dir_ent->d_name) - 2] == '.') { - - if (passthrough || is_llvm_file(ar_file) == 0) { - - if (is_duplicate(ld_params, ld_param_cnt, ar_file) == 0) { - - ld_params[ld_param_cnt++] = ar_file; - if (debug) - SAYF(cMGN "[D] " cRST "not a LTO link file: %s\n", ar_file); - - } - - } else { - - if (is_duplicate(link_params, link_param_cnt, ar_file) == 0) { - - if (we_link == 0) { // we have to honor order ... - - ld_params[ld_param_cnt++] = final_file; - we_link = 1; - - } - - link_params[link_param_cnt++] = ar_file; - if (debug) SAYF(cMGN "[D] " cRST "is a link file: %s\n", ar_file); - - } - - } - - } else - - if (dir_ent->d_name[0] != '.' && !be_quiet) - WARNF("Unusual file found in ar archive %s: %s", argv[i], ar_file); - - } - - closedir(arx); - ar_dir_cnt++; - - continue; - - } - - if (passthrough || argv[i][0] == '-' || is_llvm_file(argv[i]) == 0) { - - // -O3 fucks up the CFG and instrumentation, so we downgrade to O2 - // which is as we want things. Lets hope this is not too different - // in the various llvm versions! - if (strncmp(argv[i], "-plugin-opt=O", 13) == 0 && - !getenv("AFL_DONT_OPTIMIZE")) - ld_params[ld_param_cnt++] = "-plugin-opt=O2"; - else - ld_params[ld_param_cnt++] = argv[i]; - - } else { - - if (we_link == 0) { // we have to honor order ... - ld_params[ld_param_cnt++] = final_file; - we_link = 1; - - } - - link_params[link_param_cnt++] = argv[i]; - - } - - } - - // if (have_lto == 0) ld_params[ld_param_cnt++] = AFL_CLANG_FLTO; // maybe we - // should not ... - ld_params[ld_param_cnt] = NULL; - link_params[link_param_cnt] = NULL; - opt_params[opt_param_cnt] = NULL; - inst_params[inst_param_cnt] = NULL; - -} - -/* clean AFL_PATH from PATH */ - -void clean_path() { - - char *tmp, *newpath = NULL, *path = getenv("PATH"); - u8 done = 0; - - if (debug) - SAYF(cMGN "[D]" cRST " old PATH=%s, AFL_PATH=%s\n", path, AFL_PATH); - - // wipe AFL paths from PATH that we set - // we added two paths so we remove the two paths - while (!done) { - - if (*path == 0) - done = 1; - else if (*path++ == ':') - done = 1; - - } - - while (*path == ':') - path++; - - // AFL_PATH could be additionally in PATH so check and remove to not call our - // 'ld' - const size_t pathlen = strlen(path); - const size_t afl_pathlen = strlen(AFL_PATH); - newpath = malloc(pathlen + 1); - if (strcmp(AFL_PATH, "/bin") != 0 && strcmp(AFL_PATH, "/usr/bin") != 0 && - afl_pathlen > 1 && (tmp = strstr(path, AFL_PATH)) != NULL && // it exists - (tmp == path || - (tmp > path && - tmp[-1] == ':')) && // either starts with it or has a colon before - (tmp + afl_pathlen == path + pathlen || - (tmp + afl_pathlen < - path + (pathlen && tmp[afl_pathlen] == - ':')) // end with it or has a colon at the end - )) { - - int one_colon = 1; - - if (tmp > path) { - - memcpy(newpath, path, tmp - path); - newpath[tmp - path - 1] = 0; // remove ':' - one_colon = 0; - - } - - if (tmp + afl_pathlen < path + pathlen) tmp += afl_pathlen + one_colon; - - setenv("PATH", newpath, 1); - - } else - - setenv("PATH", path, 1); - - if (debug) SAYF(cMGN "[D]" cRST " new PATH=%s\n", getenv("PATH")); - free(newpath); - -} - -/* Main entry point */ - -int main(int argc, char **argv) { - - s32 pid, i; - int status; - u8 *ptr, exe[4096], exe2[4096], proc[32], val[2] = " "; - int have_afl_ld_caller = 0; - - if (isatty(2) && !getenv("AFL_QUIET") && !getenv("AFL_DEBUG")) { - - if (getenv("AFL_LD") != NULL) - SAYF(cCYA "afl-ld" VERSION cRST - " by Marc \"vanHauser\" Heuse <mh@mh-sec.de> (level %d)\n", - have_afl_ld_caller); - - } else - - be_quiet = 1; - - if (getenv("AFL_DEBUG") != NULL) debug = 1; - if (getenv("AFL_PATH") != NULL) afl_path = getenv("AFL_PATH"); - if (getenv("AFL_LD_PASSTHROUGH") != NULL) passthrough = 1; - if (getenv("AFL_REAL_LD") != NULL) real_ld = getenv("AFL_REAL_LD"); - if (real_ld == NULL || strlen(real_ld) < 2) real_ld = "/bin/ld"; - if (real_ld != NULL && real_ld[0] != '/') - real_ld = alloc_printf("/bin/%s", real_ld); - - if ((ptr = getenv("AFL_LD_CALLER")) != NULL) have_afl_ld_caller = atoi(ptr); - val[0] = 0x31 + have_afl_ld_caller; - setenv("AFL_LD_CALLER", val, 1); - - if (debug) { - - SAYF(cMGN "[D] " cRST - "AFL_LD=%s, set AFL_LD_CALLER=%s, have_afl_ld_caller=%d, " - "real_ld=%s\n", - getenv("AFL_LD"), val, have_afl_ld_caller, real_ld); - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < argc; i++) - SAYF(" \"%s\"", argv[i]); - SAYF("\n"); - - } - - sprintf(proc, "/proc/%d/exe", getpid()); - if (readlink(proc, exe, sizeof(exe) - 1) > 0) { - - if (readlink(real_ld, exe2, sizeof(exe2) - 1) < 1) exe2[0] = 0; - exe[sizeof(exe) - 1] = 0; - exe[sizeof(exe2) - 1] = 0; - if (strcmp(exe, real_ld) == 0 || strcmp(exe, exe2) == 0) - PFATAL(cLRD "[!] " cRST - "Error: real 'ld' path points to afl-ld, set AFL_REAL_LD to " - "the real 'ld' program!"); - - } - - if (have_afl_ld_caller > 1) - PFATAL(cLRD "[!] " cRST - "Error: afl-ld calls itself in a loop, set AFL_REAL_LD to the " - "real 'ld' program!"); - - if (argc < 2) { - - SAYF( - "\n" - "This is a helper application for afl-fuzz. It is a wrapper around GNU " - "'ld',\n" - "executed by the toolchain whenever using " - "afl-clang-lto/afl-clang-lto++.\n" - "You probably don't want to run this program directly.\n\n" - - "Environment variables:\n" - " AFL_LD_PASSTHROUGH do not link+optimize == no instrumentation\n" - " AFL_REAL_LD point to the real ld if necessary\n" - - "\nafl-ld was compiled with the fixed real 'ld' path of %s and the " - "clang " - "bin path of %s\n\n", - real_ld, LLVM_BINDIR); - - exit(1); - - } - - if (getenv("AFL_LD") == NULL) { - - /* if someone install clang/ld into the same directory as afl++ then - they are out of luck ... */ - - if (have_afl_ld_caller == 1) { clean_path(); } - - if (real_ld != NULL && strlen(real_ld) > 1) execvp(real_ld, argv); - execvp("ld", argv); // fallback - PFATAL("Oops, failed to execute 'ld' - check your PATH"); - - } - - atexit(at_exit_handler); // ensure to wipe temp files if things fail - - edit_params(argc, argv); // here most of the magic happens :-) - - if (debug) - SAYF(cMGN "[D] " cRST - "param counts: ar:%u lib:%u ld:%u link:%u opt:%u instr:%u\n", - ar_dir_cnt, libdir_cnt, ld_param_cnt, link_param_cnt, opt_param_cnt, - inst_param_cnt); - - if (!just_version) { - - if (we_link == 0) { - - if (!getenv("AFL_QUIET")) - WARNF("No LTO input file found, cannot instrument!"); - - } else { - - /* first we link all files */ - if (!be_quiet) OKF("Running bitcode linker, creating %s", linked_file); - - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < link_param_cnt; i++) - SAYF(" \"%s\"", link_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(link_params[0], (char **)link_params); - FATAL("Oops, failed to execute '%s'", link_params[0]); - - } - - if (pid < 0) PFATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) { - - SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD - "\n[-] PROGRAM ABORT : " cRST); - SAYF( - "llvm-link failed! Probable causes:\n\n" - " #1 If the error is \"linking globals named '...': symbol " - "multiply defined\"\n" - " then there is nothing we can do - llvm-link is missing an " - "important feature\n\n" - " #2 If the error is \"expected top-level entity\" and then " - "binary output, this\n" - " is because the same file is present in different .a archives " - "in different\n" - " formats. This can be fixed by manual doing the steps afl-ld " - "is doing but\n" - " programmatically - sorry!\n\n"); - exit(WEXITSTATUS(status)); - - } - - /* then we perform an optimization on the collected objects files */ - if (!be_quiet) - OKF("Performing optimization via opt, creating %s", modified_file); - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < opt_param_cnt; i++) - SAYF(" \"%s\"", opt_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(opt_params[0], (char **)opt_params); - FATAL("Oops, failed to execute '%s'", opt_params[0]); - - } - - if (pid < 0) PFATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status)); - - /* then we run the instrumentation through the optimizer */ - if (!be_quiet) - OKF("Performing instrumentation via opt, creating %s", final_file); - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < inst_param_cnt; i++) - SAYF(" \"%s\"", inst_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(inst_params[0], (char **)inst_params); - FATAL("Oops, failed to execute '%s'", inst_params[0]); - - } - - if (pid < 0) PFATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status)); - - } - - /* next step - run the linker! :-) */ - - } - - if (!be_quiet) OKF("Running real linker %s", real_ld); - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < ld_param_cnt; i++) - SAYF(" \"%s\"", ld_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - clean_path(); - - unsetenv("AFL_LD"); - - if (strlen(real_ld) > 1) execvp(real_ld, (char **)ld_params); - execvp("ld", (char **)ld_params); // fallback - FATAL("Oops, failed to execute 'ld' - check your PATH"); - - } - - if (pid < 0) PFATAL("fork() failed"); - - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (debug) SAYF(cMGN "[D] " cRST "linker result: %d\n", status); - - if (!just_version) { - - if (!getenv("AFL_KEEP_ASSEMBLY")) { - - if (linked_file) { - - unlink(linked_file); - linked_file = NULL; - - } - - if (modified_file) { - - unlink(modified_file); - modified_file = NULL; - - } - - if (final_file) { - - unlink(final_file); - final_file = NULL; - - } - - if (ar_dir != NULL) { - - wipe_directory(ar_dir); - ar_dir = NULL; - - } - - } else { - - if (!be_quiet) { - - SAYF( - "[!] afl-ld: keeping link file %s, optimized bitcode %s and " - "instrumented bitcode %s", - linked_file, modified_file, final_file); - if (ar_dir_cnt > 0 && ar_dir) - SAYF(" and ar archive unpack directory %s", ar_dir); - SAYF("\n"); - - } - - } - - if (status == 0) { - - if (!be_quiet) OKF("Linker was successful"); - - } else { - - SAYF(cLRD "[-] " cRST - "Linker failed, please investigate and send a bug report. Most " - "likely an 'ld' option is incompatible with %s. Try " - "AFL_KEEP_ASSEMBLY=1 and AFL_DEBUG=1 for replaying.\n", - AFL_CLANG_FLTO); - - } - - } - - exit(WEXITSTATUS(status)); - -} - diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc index 4bc16f17..ece3201f 100644 --- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc +++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc @@ -23,12 +23,6 @@ */ -// CONFIG OPTION: -// If #define USE_SPLIT is used, then the llvm::SplitEdge function is used -// instead of our own implementation. Ours looks better and will -// compile everywhere. But it is not working for complex code. yet. damn. -#define USE_SPLIT - #define AFL_LLVM_PASS #include "config.h" @@ -44,31 +38,24 @@ #include <sys/time.h> #include "llvm/Config/llvm-config.h" -#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 -typedef long double max_align_t; -#endif - +#include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#ifdef USE_SPLIT #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSAUpdater.h" -#endif +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/CFG.h" -#else -#include "llvm/DebugInfo.h" -#include "llvm/Support/CFG.h" -#endif +#include <set> using namespace llvm; @@ -91,7 +78,6 @@ class AFLLTOPass : public ModulePass { } -#ifdef USE_SPLIT void getAnalysisUsage(AnalysisUsage &AU) const override { ModulePass::getAnalysisUsage(AU); @@ -100,8 +86,6 @@ class AFLLTOPass : public ModulePass { } -#endif - // Calculate the number of average collisions that would occur if all // location IDs would be assigned randomly (like normal afl/afl++). // This uses the "balls in bins" algorithm. @@ -168,7 +152,7 @@ class AFLLTOPass : public ModulePass { bool runOnModule(Module &M) override; protected: - int afl_global_id = 1, debug = 0; + int afl_global_id = 1, debug = 0, autodictionary = 0; uint32_t be_quiet = 0, inst_blocks = 0, inst_funcs = 0, total_instr = 0; }; @@ -177,22 +161,19 @@ class AFLLTOPass : public ModulePass { bool AFLLTOPass::runOnModule(Module &M) { - LLVMContext &C = M.getContext(); + LLVMContext & C = M.getContext(); + std::vector<std::string> dictionary; + std::vector<CallInst *> calls; + DenseMap<Value *, std::string *> valueMap; - IntegerType * Int8Ty = IntegerType::getInt8Ty(C); - IntegerType * Int32Ty = IntegerType::getInt32Ty(C); - struct timeval tv; - struct timezone tz; - u32 rand_seed; + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); - /* Setup random() so we get Actually Random(TM) outputs from AFL_R() */ - gettimeofday(&tv, &tz); - rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); - AFL_SR(rand_seed); + if (getenv("AFL_DEBUG")) debug = 1; /* Show a banner */ - if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { SAYF(cCYA "afl-llvm-lto" VERSION cRST " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); @@ -201,9 +182,9 @@ bool AFLLTOPass::runOnModule(Module &M) { be_quiet = 1; -#if LLVM_VERSION_MAJOR < 9 - char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO"); -#endif + if (getenv("AFL_LLVM_AUTODICTIONARY") || + getenv("AFL_LLVM_LTO_AUTODICTIONARY")) + autodictionary = 1; /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ @@ -224,14 +205,320 @@ bool AFLLTOPass::runOnModule(Module &M) { if (F.size() < 2) continue; if (isBlacklisted(&F)) continue; -#ifdef USE_SPLIT - // DominatorTree &DT = - // getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); LoopInfo & LI = - // getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo(); -#endif - std::vector<BasicBlock *> InsBlocks; + if (autodictionary) { + + /* Some implementation notes. + * + * We try to handle 3 cases: + * - memcmp("foo", arg, 3) <- literal string + * - static char globalvar[] = "foo"; + * memcmp(globalvar, arg, 3) <- global variable + * - char localvar[] = "foo"; + * memcmp(locallvar, arg, 3) <- local variable + * + * The local variable case is the hardest. We can only detect that + * case if there is no reassignment or change in the variable. + * And it might not work across llvm version. + * What we do is hooking the initializer function for local variables + * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned + * variable. And if that variable is then used in a compare function + * we use that noted string. + * This seems not to work for tokens that have a size <= 4 :-( + * + * - if the compared length is smaller than the string length we + * save the full string. This is likely better for fuzzing but + * might be wrong in a few cases depending on optimizers + * + * - not using StringRef because there is a bug in the llvm 11 + * checkout I am using which sometimes points to wrong strings + * + * Over and out. Took me a full day. damn. mh/vh + */ + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool addedNull = false; + uint8_t optLen = 0; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= !FuncName.compare("memcmp"); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1 = getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) + HasStr1 = false; + else + Str1 = TmpStr.str(); + bool HasStr2 = getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) + HasStr2 = false; + else + Str2 = TmpStr.str(); + + if (debug) + fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", + FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), + Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, + Str2P->getName().str().c_str(), Str2.c_str(), + HasStr2 == true ? "true" : "false"); + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString().str(); + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = Str2.size(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + addedNull = true; + + } + + } + + valueMap[Str1P] = new std::string(Str2); + + if (debug) + fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + if (debug) + fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), + Str2P); + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast<ConstantExpr>(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString().str(); + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + if (debug) + fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), + Str1P); + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 ^ HasStr2)) continue; + + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } + + } + + // add null byte if this is a string compare function and a null + // was not already added + if (addedNull == false && !isMemcmp) { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + if (!be_quiet) { + + std::string outstring; + fprintf(stderr, "%s: length %u/%u \"", FuncName.c_str(), optLen, + (unsigned int)thestring.length()); + for (uint8_t i = 0; i < thestring.length(); i++) { + + uint8_t c = thestring[i]; + if (c <= 32 || c >= 127) + fprintf(stderr, "\\x%02x", c); + else + fprintf(stderr, "%c", c); + + } + + fprintf(stderr, "\"\n"); + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < MIN_AUTO_EXTRA) // too short? skip + continue; + + dictionary.push_back(thestring.substr(0, optLen)); + + } + + } + + } + + } + for (auto &BB : F) { uint32_t succ = 0; @@ -274,11 +561,7 @@ bool AFLLTOPass::runOnModule(Module &M) { for (uint32_t j = 0; j < Successors.size(); j++) { -#ifdef USE_SPLIT BasicBlock *newBB = llvm::SplitEdge(origBB, Successors[j]); -#else - BasicBlock *newBB = BasicBlock::Create(C, "", &F, nullptr); -#endif if (!newBB) { @@ -287,12 +570,8 @@ bool AFLLTOPass::runOnModule(Module &M) { } -#ifdef USE_SPLIT BasicBlock::iterator IP = newBB->getFirstInsertionPt(); IRBuilder<> IRB(&(*IP)); -#else - IRBuilder<> IRB(&(*newBB)); -#endif /* Set the ID of the inserted basic block */ @@ -313,38 +592,12 @@ bool AFLLTOPass::runOnModule(Module &M) { Value *Incr = IRB.CreateAdd(Counter, One); -#if LLVM_VERSION_MAJOR < 9 - if (neverZero_counters_str != - NULL) { // with llvm 9 we make this the default as the bug in - // llvm is then fixed -#endif - auto cf = IRB.CreateICmpEQ(Incr, Zero); - auto carry = IRB.CreateZExt(cf, Int8Ty); - Incr = IRB.CreateAdd(Incr, carry); -#if LLVM_VERSION_MAJOR < 9 - - } - -#endif + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); -#ifdef USE_SPLIT - // nothing -#else - - // Unconditional jump to the destination BB - - IRB.CreateBr(Successors[j]); - - // Replace the original destination to this newly inserted BB - - origBB->replacePhiUsesWith(Successors[j], newBB); - BasicBlock *S = Successors[j]; - S->replacePhiUsesWith(origBB, newBB); - TI->setSuccessor(j, newBB); - -#endif // done :) inst_blocks++; @@ -358,28 +611,147 @@ bool AFLLTOPass::runOnModule(Module &M) { } // save highest location ID to global variable - - if (afl_global_id > MAP_SIZE) { + // do this after each function to fail faster + if (!be_quiet && afl_global_id > MAP_SIZE) { uint32_t pow2map = 1, map = afl_global_id; while ((map = map >> 1)) pow2map++; - FATAL( - "We have %u blocks to instrument but the map size is only %u! Edit " - "config.h and set MAP_SIZE_POW2 from %u to %u, then recompile " - "afl-fuzz and llvm_mode.", - afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map); + WARNF( + "We have %u blocks to instrument but the map size is only %u. Either " + "edit config.h and set MAP_SIZE_POW2 from %u to %u, then recompile " + "afl-fuzz and llvm_mode and then make this target - or set " + "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this " + "target.", + afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id); } - if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { + if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL || dictionary.size()) { + + // yes we could create our own function, insert it into ctors ... + // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o + + Function *f = M.getFunction("__afl_auto_init_globals"); + + if (!f) { + + fprintf(stderr, + "Error: init function could not be found (this should not " + "happen)\n"); + exit(-1); + + } + + BasicBlock *bb = &f->getEntryBlock(); + if (!bb) { + + fprintf(stderr, + "Error: init function does not have an EntryBlock (this should " + "not happen)\n"); + exit(-1); + + } + + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { - GlobalVariable *AFLFinalLoc = new GlobalVariable( - M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc", 0, - GlobalVariable::GeneralDynamicTLSModel, 0, false); - ConstantInt *const_loc = ConstantInt::get(Int32Ty, afl_global_id); - AFLFinalLoc->setAlignment(4); - AFLFinalLoc->setInitializer(const_loc); + uint32_t write_loc = afl_global_id; + + if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3); + + if (write_loc <= MAP_SIZE && write_loc <= 0x800000) { + + GlobalVariable *AFLFinalLoc = new GlobalVariable( + M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, + "__afl_final_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, + false); + ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc); + StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); + StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + if (dictionary.size()) { + + size_t memlen = 0, count = 0, offset = 0; + char * ptr; + + for (auto token : dictionary) { + + memlen += token.length(); + count++; + + } + + if (!be_quiet) + printf("AUTODICTIONARY: %lu string%s found\n", count, + count == 1 ? "" : "s"); + + if (count) { + + if ((ptr = (char *)malloc(memlen + count)) == NULL) { + + fprintf(stderr, "Error: malloc for %lu bytes failed!\n", + memlen + count); + exit(-1); + + } + + count = 0; + + for (auto token : dictionary) { + + if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { + + ptr[offset++] = (uint8_t)token.length(); + memcpy(ptr + offset, token.c_str(), token.length()); + offset += token.length(); + count++; + + } + + } + + GlobalVariable *AFLDictionaryLen = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, + "__afl_dictionary_len", 0, GlobalVariable::GeneralDynamicTLSModel, + 0, false); + ConstantInt *const_len = ConstantInt::get(Int32Ty, offset); + StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen); + StoreDictLen->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset); + GlobalVariable *AFLInternalDictionary = new GlobalVariable( + M, ArrayTy, true, GlobalValue::ExternalLinkage, + ConstantDataArray::get(C, + *(new ArrayRef<char>((char *)ptr, offset))), + "__afl_internal_dictionary", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); + AFLInternalDictionary->setInitializer(ConstantDataArray::get( + C, *(new ArrayRef<char>((char *)ptr, offset)))); + AFLInternalDictionary->setConstant(true); + + GlobalVariable *AFLDictionary = new GlobalVariable( + M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, + 0, "__afl_dictionary"); + + Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero); + Value *AFLDictPtr = + IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0)); + StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary); + StoreDict->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } } @@ -423,5 +795,5 @@ static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass", false, false); static RegisterStandardPasses RegisterAFLLTOPass( - PassManagerBuilder::EP_OptimizerLast, registerAFLLTOPass); + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass); diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index b4249802..71abcd05 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -125,6 +125,7 @@ class AFLCoverage : public ModulePass { std::list<std::string> myWhitelist; uint32_t ngram_size = 0; uint32_t debug = 0; + uint32_t map_size = MAP_SIZE; char * ctx_str = NULL; }; @@ -192,6 +193,19 @@ bool AFLCoverage::runOnModule(Module &M) { be_quiet = 1; + /* + char *ptr; + if ((ptr = getenv("AFL_MAP_SIZE")) || (ptr = getenv("AFL_MAPSIZE"))) { + + map_size = atoi(ptr); + if (map_size < 8 || map_size > (1 << 29)) + FATAL("illegal AFL_MAP_SIZE %u, must be between 2^3 and 2^30", + map_size); if (map_size % 8) map_size = (((map_size >> 3) + 1) << 3); + + } + + */ + /* Decide instrumentation ratio */ char * inst_ratio_str = getenv("AFL_INST_RATIO"); @@ -365,7 +379,7 @@ bool AFLCoverage::runOnModule(Module &M) { // if yes we store a context ID for this function in the global var if (has_calls) { - ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(MAP_SIZE)); + ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size)); StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext); StoreCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); @@ -509,7 +523,7 @@ bool AFLCoverage::runOnModule(Module &M) { /* Make up cur_loc */ // cur_loc++; - cur_loc = AFL_R(MAP_SIZE); + cur_loc = AFL_R(map_size); /* There is a problem with Ubuntu 18.04 and llvm 6.0 (see issue #63). The inline function successors() is not inlined and also not found at runtime @@ -705,6 +719,56 @@ bool AFLCoverage::runOnModule(Module &M) { } + /* + // This is currently disabled because we not only need to create/insert a + // function (easy), but also add it as a constructor with an ID < 5 + + if (getenv("AFL_LLVM_DONTWRITEID") == NULL) { + + // yes we could create our own function, insert it into ctors ... + // but this would be a pain in the butt ... so we use afl-llvm-rt.o + + Function *f = ... + + if (!f) { + + fprintf(stderr, + "Error: init function could not be created (this should not + happen)\n"); exit(-1); + + } + + ... constructor for f = 4 + + BasicBlock *bb = &f->getEntryBlock(); + if (!bb) { + + fprintf(stderr, + "Error: init function does not have an EntryBlock (this should + not happen)\n"); exit(-1); + + } + + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + if (map_size <= 0x800000) { + + GlobalVariable *AFLFinalLoc = new GlobalVariable( + M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, + "__afl_final_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, + false); + ConstantInt *const_loc = ConstantInt::get(Int32Ty, map_size); + StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); + StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + */ + /* Say something nice. */ if (!be_quiet) { diff --git a/llvm_mode/afl-llvm-rt-lto.o.c b/llvm_mode/afl-llvm-rt-lto.o.c new file mode 100644 index 00000000..5921f968 --- /dev/null +++ b/llvm_mode/afl-llvm-rt-lto.o.c @@ -0,0 +1,23 @@ +/* + american fuzzy lop++ - LLVM instrumentation bootstrap + ----------------------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +// to prevent the function from being removed +unsigned char __afl_lto_mode = 0; + +/* Proper initialization routine. */ + +__attribute__((constructor(0))) void __afl_auto_init_globals(void) { + + __afl_lto_mode = 1; + +} + diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c index 3651fd97..f286e66a 100644 --- a/llvm_mode/afl-llvm-rt.o.c +++ b/llvm_mode/afl-llvm-rt.o.c @@ -50,11 +50,7 @@ Basically, we need to make sure that the forkserver is initialized after the LLVM-generated runtime initialization pass, not before. */ -#ifdef USE_TRACE_PC #define CONST_PRIO 5 -#else -#define CONST_PRIO 0 -#endif /* ^USE_TRACE_PC */ #include <sys/mman.h> #include <fcntl.h> @@ -65,17 +61,20 @@ u8 __afl_area_initial[MAP_SIZE]; u8 *__afl_area_ptr = __afl_area_initial; +u8 *__afl_dictionary; #ifdef __ANDROID__ PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; u32 __afl_final_loc; u32 __afl_prev_ctx; u32 __afl_cmp_counter; +u32 __afl_dictionary_len; #else __thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; __thread u32 __afl_final_loc; __thread u32 __afl_prev_ctx; __thread u32 __afl_cmp_counter; +__thread u32 __afl_dictionary_len; #endif struct cmp_map *__afl_cmp_map; @@ -100,6 +99,10 @@ static void __afl_map_shm(void) { const char * shm_file_path = id_str; int shm_fd = -1; unsigned char *shm_base = NULL; + unsigned int map_size = MAP_SIZE + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) map_size = + __afl_final_loc; /* create the shared memory segment as if it was a file */ shm_fd = shm_open(shm_file_path, O_RDWR, 0600); @@ -111,7 +114,7 @@ static void __afl_map_shm(void) { } /* map the shared memory segment to the address space of the process */ - shm_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + shm_base = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); if (shm_base == MAP_FAILED) { close(shm_fd); @@ -187,8 +190,15 @@ static void __afl_map_shm(void) { #ifdef __linux__ static void __afl_start_snapshots(void) { - static u8 tmp[4]; + static u8 tmp[4] = {0, 0, 0, 0}; s32 child_pid; + u32 status = 0; + u32 map_size = MAP_SIZE; + u32 already_read_first = 0; + u32 was_killed; + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) + map_size = __afl_final_loc; u8 child_stopped = 0; @@ -197,16 +207,74 @@ static void __afl_start_snapshots(void) { /* Phone home and tell the parent that we're OK. If parent isn't there, assume we're not running in forkserver mode and just execute program. */ + status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT); + if (map_size <= 0x800000) + status |= (FS_OPT_SET_MAPSIZE(map_size) | FS_OPT_MAPSIZE); + if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT; + memcpy(tmp, &status, 4); + if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; + if (__afl_dictionary_len > 0 && __afl_dictionary) { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == + (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { + + // great lets pass the dictionary through the forkserver FD + u32 len = __afl_dictionary_len, offset = 0; + s32 ret; + + if (write(FORKSRV_FD + 1, &len, 4) != 4) { + + write(2, "Error: could not send dictionary len\n", + strlen("Error: could not send dictionary len\n")); + _exit(1); + + } + + while (len != 0) { + + ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len); + + if (ret < 1) { + + write(2, "Error: could not send dictionary\n", + strlen("Error: could not send dictionary\n")); + _exit(1); + + } + + len -= ret; + offset += ret; + + } + + } else { + + // uh this forkserver master does not understand extended option passing + // or does not want the dictionary + already_read_first = 1; + + } + + } + while (1) { - u32 was_killed; int status; - /* Wait for parent by reading from the pipe. Abort if read fails. */ + if (already_read_first) { - if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + already_read_first = 0; + + } else { + + /* Wait for parent by reading from the pipe. Abort if read fails. */ + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + } /* If we stopped the child in persistent mode, but there was a race condition and afl-fuzz already issued SIGKILL, write off the old @@ -291,26 +359,92 @@ static void __afl_start_forkserver(void) { #endif - static u8 tmp[4]; - s32 child_pid; + u8 tmp[4] = {0, 0, 0, 0}; + s32 child_pid; + u32 status = 0; + u32 map_size = MAP_SIZE; + u32 already_read_first = 0; + u32 was_killed; + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) + map_size = __afl_final_loc; u8 child_stopped = 0; void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + if (map_size <= 0x800000) + status |= (FS_OPT_SET_MAPSIZE(map_size) | FS_OPT_MAPSIZE); + if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT; + if (status) status |= (FS_OPT_ENABLED); + memcpy(tmp, &status, 4); + /* Phone home and tell the parent that we're OK. If parent isn't there, assume we're not running in forkserver mode and just execute program. */ if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; + if (__afl_dictionary_len > 0 && __afl_dictionary) { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == + (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { + + // great lets pass the dictionary through the forkserver FD + u32 len = __afl_dictionary_len, offset = 0; + s32 ret; + + if (write(FORKSRV_FD + 1, &len, 4) != 4) { + + write(2, "Error: could not send dictionary len\n", + strlen("Error: could not send dictionary len\n")); + _exit(1); + + } + + while (len != 0) { + + ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len); + + if (ret < 1) { + + write(2, "Error: could not send dictionary\n", + strlen("Error: could not send dictionary\n")); + _exit(1); + + } + + len -= ret; + offset += ret; + + } + + } else { + + // uh this forkserver master does not understand extended option passing + // or does not want the dictionary + already_read_first = 1; + + } + + } + while (1) { - u32 was_killed; int status; /* Wait for parent by reading from the pipe. Abort if read fails. */ - if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + if (already_read_first) { + + already_read_first = 0; + + } else { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + } /* If we stopped the child in persistent mode, but there was a race condition and afl-fuzz already issued SIGKILL, write off the old @@ -378,8 +512,12 @@ static void __afl_start_forkserver(void) { int __afl_persistent_loop(unsigned int max_cnt) { - static u8 first_pass = 1; - static u32 cycle_cnt; + static u8 first_pass = 1; + static u32 cycle_cnt; + unsigned int map_size = MAP_SIZE; + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) + map_size = __afl_final_loc; if (first_pass) { @@ -390,7 +528,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { if (is_persistent) { - memset(__afl_area_ptr, 0, MAP_SIZE); + memset(__afl_area_ptr, 0, map_size); __afl_area_ptr[0] = 1; memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); @@ -513,13 +651,30 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { ///// CmpLog instrumentation -void __cmplog_ins_hook1(uint8_t Arg1, uint8_t Arg2) { +void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) { + + if (!__afl_cmp_map) return; - return; + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_INS; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + // if (!__afl_cmp_map->headers[k].cnt) + // __afl_cmp_map->headers[k].cnt = __afl_cmp_counter++; + + __afl_cmp_map->headers[k].shape = 0; + + hits &= CMP_MAP_H - 1; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; } -void __cmplog_ins_hook2(uint16_t Arg1, uint16_t Arg2) { +void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) { if (!__afl_cmp_map) return; @@ -531,19 +686,16 @@ void __cmplog_ins_hook2(uint16_t Arg1, uint16_t Arg2) { u32 hits = __afl_cmp_map->headers[k].hits; __afl_cmp_map->headers[k].hits = hits + 1; - // if (!__afl_cmp_map->headers[k].cnt) - // __afl_cmp_map->headers[k].cnt = __afl_cmp_counter++; __afl_cmp_map->headers[k].shape = 1; - //__afl_cmp_map->headers[k].type = CMP_TYPE_INS; hits &= CMP_MAP_H - 1; - __afl_cmp_map->log[k][hits].v0 = Arg1; - __afl_cmp_map->log[k][hits].v1 = Arg2; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; } -void __cmplog_ins_hook4(uint32_t Arg1, uint32_t Arg2) { +void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) { if (!__afl_cmp_map) return; @@ -559,12 +711,12 @@ void __cmplog_ins_hook4(uint32_t Arg1, uint32_t Arg2) { __afl_cmp_map->headers[k].shape = 3; hits &= CMP_MAP_H - 1; - __afl_cmp_map->log[k][hits].v0 = Arg1; - __afl_cmp_map->log[k][hits].v1 = Arg2; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; } -void __cmplog_ins_hook8(uint64_t Arg1, uint64_t Arg2) { +void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) { if (!__afl_cmp_map) return; @@ -580,8 +732,8 @@ void __cmplog_ins_hook8(uint64_t Arg1, uint64_t Arg2) { __afl_cmp_map->headers[k].shape = 7; hits &= CMP_MAP_H - 1; - __afl_cmp_map->log[k][hits].v0 = Arg1; - __afl_cmp_map->log[k][hits].v1 = Arg2; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; } @@ -596,28 +748,28 @@ void __cmplog_ins_hook8(uint64_t Arg1, uint64_t Arg2) { #pragma weak __sanitizer_cov_trace_cmp4 = __cmplog_ins_hook4 #pragma weak __sanitizer_cov_trace_cmp8 = __cmplog_ins_hook8 #else -void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) +void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) __attribute__((alias("__cmplog_ins_hook1"))); -void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) +void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) __attribute__((alias("__cmplog_ins_hook2"))); -void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) +void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2) __attribute__((alias("__cmplog_ins_hook4"))); -void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) +void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) __attribute__((alias("__cmplog_ins_hook8"))); -void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) +void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) __attribute__((alias("__cmplog_ins_hook1"))); -void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) +void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) __attribute__((alias("__cmplog_ins_hook2"))); -void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) +void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) __attribute__((alias("__cmplog_ins_hook4"))); -void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) +void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) __attribute__((alias("__cmplog_ins_hook8"))); #endif /* defined(__APPLE__) */ -void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) { +void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { - for (uint64_t i = 0; i < Cases[0]; i++) { + for (uint64_t i = 0; i < cases[0]; i++) { uintptr_t k = (uintptr_t)__builtin_return_address(0) + i; k = (k >> 4) ^ (k << 8); @@ -631,8 +783,8 @@ void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) { __afl_cmp_map->headers[k].shape = 7; hits &= CMP_MAP_H - 1; - __afl_cmp_map->log[k][hits].v0 = Val; - __afl_cmp_map->log[k][hits].v1 = Cases[i + 2]; + __afl_cmp_map->log[k][hits].v0 = val; + __afl_cmp_map->log[k][hits].v1 = cases[i + 2]; } diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 2ca70659..84a9b8d9 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -112,11 +112,12 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const bool processStrcasecmp, const bool processStrncasecmp) { - std::vector<CallInst *> calls; - LLVMContext & C = M.getContext(); - IntegerType * Int8Ty = IntegerType::getInt8Ty(C); - IntegerType * Int32Ty = IntegerType::getInt32Ty(C); - IntegerType * Int64Ty = IntegerType::getInt64Ty(C); + DenseMap<Value *, std::string *> valueMap; + std::vector<CallInst *> calls; + LLVMContext & C = M.getContext(); + IntegerType * Int8Ty = IntegerType::getInt8Ty(C); + IntegerType * Int32Ty = IntegerType::getInt32Ty(C); + IntegerType * Int64Ty = IntegerType::getInt64Ty(C); #if LLVM_VERSION_MAJOR < 9 Constant * @@ -263,6 +264,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, bool isStrncmp = processStrncmp; bool isStrcasecmp = processStrcasecmp; bool isStrncasecmp = processStrncasecmp; + bool isIntMemcpy = true; + bool indirect = false; Function *Callee = callInst->getCalledFunction(); if (!Callee) continue; @@ -273,9 +276,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, isStrncmp &= !FuncName.compare(StringRef("strncmp")); isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp")); isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp")); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp) + !isStrncasecmp && !isIntMemcpy) continue; /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function @@ -309,7 +313,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, FT->getParamType(2)->isIntegerTy(); if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp) + !isStrncasecmp && !isIntMemcpy) continue; /* is a str{n,}{case,}cmp/memcmp, check if we have @@ -322,6 +326,97 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, bool HasStr1 = getConstantStringInfo(Str1P, Str1); bool HasStr2 = getConstantStringInfo(Str2P, Str2); + if (isIntMemcpy && HasStr2) { + + valueMap[Str1P] = new std::string(Str2.str()); + // fprintf(stderr, "saved %s for %p\n", Str2.str().c_str(), Str1P); + continue; + + } + + // not literal? maybe global or local variable + if (!(HasStr1 ^ HasStr2)) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString(); + valueMap[Str2P] = new std::string(Str2.str()); + // fprintf(stderr, "glo2 %s\n", Str2.str().c_str()); + + } + + } + + } + + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str1P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString(); + valueMap[Str1P] = new std::string(Str1.str()); + // fprintf(stderr, "glo1 %s\n", Str1.str().c_str()); + + } + + } + + } + + } else if (isIntMemcpy) { + + valueMap[Str1P] = new std::string(Str2.str()); + // fprintf(stderr, "saved\n"); + + } + + if ((HasStr1 ^ HasStr2)) indirect = true; + + } + + if (isIntMemcpy) continue; + + if (!(HasStr1 ^ HasStr2)) { + + // do we have a saved local variable initialization? + std::string *val = valueMap[Str1P]; + if (val && !val->empty()) { + + Str1 = StringRef(*val); + HasStr1 = true; + indirect = true; + // fprintf(stderr, "loaded1 %s\n", Str1.str().c_str()); + + } else { + + val = valueMap[Str2P]; + if (val && !val->empty()) { + + Str2 = StringRef(*val); + HasStr2 = true; + indirect = true; + // fprintf(stderr, "loaded2 %s\n", Str2.str().c_str()); + + } + + } + + } + /* handle cases of one string is const, one string is variable */ if (!(HasStr1 ^ HasStr2)) continue; @@ -334,9 +429,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, if (!ilen) continue; /* final precaution: if size of compare is larger than constant * string skip it*/ - uint64_t literalLength = - HasStr1 ? GetStringLength(Str1P) : GetStringLength(Str2P); - if (literalLength < ilen->getZExtValue()) continue; + uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); + if (literalLength + 1 < ilen->getZExtValue()) continue; } @@ -363,9 +457,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, std::string TmpConstStr; Value * VarStr; bool HasStr1 = getConstantStringInfo(Str1P, Str1); - getConstantStringInfo(Str2P, Str2); - uint64_t constLen, sizedLen; - bool isMemcmp = + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + uint64_t constLen, sizedLen; + bool isMemcmp = !callInst->getCalledFunction()->getName().compare(StringRef("memcmp")); bool isSizedcmp = isMemcmp || !callInst->getCalledFunction()->getName().compare( @@ -389,6 +483,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } + if (!(HasStr1 ^ HasStr2)) { + + // do we have a saved local or global variable initialization? + std::string *val = valueMap[Str1P]; + if (val && !val->empty()) { + + Str1 = StringRef(*val); + HasStr1 = true; + + } else { + + val = valueMap[Str2P]; + if (val && !val->empty()) { + + Str2 = StringRef(*val); + HasStr2 = true; + + } + + } + + } + if (HasStr1) { TmpConstStr = Str1.str(); |