diff options
Diffstat (limited to 'llvm_mode')
-rw-r--r-- | llvm_mode/GNUmakefile | 364 | ||||
-rw-r--r-- | llvm_mode/LLVMInsTrim.so.cc | 3 | ||||
-rw-r--r-- | llvm_mode/Makefile | 351 | ||||
-rw-r--r-- | llvm_mode/README.ctx.md | 22 | ||||
-rw-r--r-- | llvm_mode/README.instrim.md | 7 | ||||
-rw-r--r-- | llvm_mode/README.lto.md | 201 | ||||
-rw-r--r-- | llvm_mode/README.md | 42 | ||||
-rw-r--r-- | llvm_mode/README.ngram.md | 28 | ||||
-rw-r--r-- | llvm_mode/README.snapshot.md | 16 | ||||
-rw-r--r-- | llvm_mode/afl-clang-fast.c | 498 | ||||
-rw-r--r-- | llvm_mode/afl-ld.c | 839 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-lto-instrumentation.so.cc | 575 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-pass.so.cc | 279 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-rt-lto.o.c | 23 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-rt.o.c | 287 | ||||
-rw-r--r-- | llvm_mode/compare-transform-pass.so.cc | 143 | ||||
-rw-r--r-- | llvm_mode/llvm-ngram-coverage.h | 18 | ||||
-rw-r--r-- | llvm_mode/split-compares-pass.so.cc | 4 |
18 files changed, 2014 insertions, 1686 deletions
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile new file mode 100644 index 00000000..d6a00580 --- /dev/null +++ b/llvm_mode/GNUmakefile @@ -0,0 +1,364 @@ +# +# american fuzzy lop++ - LLVM instrumentation +# ----------------------------------------- +# +# Written by Laszlo Szekeres <lszekeres@google.com> and +# Michal Zalewski +# +# LLVM integration design comes from Laszlo Szekeres. +# +# Copyright 2015, 2016 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +# For Heiko: +#TEST_MMAP=1 +HASH=\# + +PREFIX ?= /usr/local +HELPER_PATH ?= $(PREFIX)/lib/afl +BIN_PATH ?= $(PREFIX)/bin +DOC_PATH ?= $(PREFIX)/share/doc/afl +MISC_PATH ?= $(PREFIX)/share/afl +MAN_PATH ?= $(PREFIX)/man/man8 + +VERSION = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2) + +ifeq "$(shell uname)" "OpenBSD" + LLVM_CONFIG ?= $(BIN_PATH)/llvm-config + HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1) + ifeq "$(HAS_OPT)" "1" + $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") + endif +else + LLVM_CONFIG ?= llvm-config +endif + +LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) +LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) +LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') +LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) +LLVM_STDCXX = gnu++11 +LLVM_APPLE = $(shell clang -v 2>&1 | grep -iq apple && echo 1 || echo 0) +LLVM_LTO = 0 + +ifeq "$(LLVMVER)" "" + $(warning [!] llvm_mode needs llvm-config, which was not found) +endif + +ifeq "$(LLVM_UNSUPPORTED)" "1" + $(warning llvm_mode only supports llvm versions 3.8.0 up to 11) +endif + +ifeq "$(LLVM_MAJOR)" "9" + $(info [+] llvm_mode detected llvm 9, enabling neverZero implementation) +endif + +ifeq "$(LLVM_NEW_API)" "1" + $(info [+] llvm_mode detected llvm 10+, enabling neverZero implementation and c++14) + LLVM_STDCXX = c++14 +endif + +ifeq "$(LLVM_MAJOR)" "11" + $(info [+] llvm_mode detected llvm 11, enabling afl-clang-lto LTO implementation) + LLVM_LTO = 1 +endif + +ifeq "$(LLVM_LTO)" "0" + $(info [+] llvm_mode detected llvm < 11, afl-clang-lto LTO will not be build.) +endif + +ifeq "$(LLVM_APPLE)" "1" + $(warning llvm_mode will not compile with Xcode clang...) +endif + +# We were using llvm-config --bindir to get the location of clang, but +# this seems to be busted on some distros, so using the one in $PATH is +# probably better. + +CC = $(LLVM_BINDIR)/clang +CXX = $(LLVM_BINDIR)/clang++ + +ifeq "$(shell test -e $(CC) || echo 1 )" "1" + # llvm-config --bindir may not providing a valid path, so ... + ifeq "$(shell test -e '$(BIN_DIR)/clang' && echo 1)" "1" + # we found one in the local install directory, lets use these + CC = $(BIN_DIR)/clang + CXX = $(BIN_DIR)/clang++ + else + # hope for the best + $(warning we have trouble finding clang/clang++ - llvm-config is not helping us) + CC = clang + CXX = clang++ + endif +endif + +# sanity check. +# Are versions of clang --version and llvm-config --version equal? +CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*version\ ([0-9]\.[0-9]\.[0-9]).*/s//\1/p') + +ifneq "$(CLANGVER)" "$(LLVMVER)" + CC = $(shell $(LLVM_CONFIG) --bindir)/clang + CXX = $(shell $(LLVM_CONFIG) --bindir)/clang++ +endif + +# After we set CC/CXX we can start makefile magic tests + +ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" + CFLAGS_OPT = -march=native +endif + +ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" + AFL_CLANG_FLTO ?= -flto=full +else + ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" + AFL_CLANG_FLTO ?= -flto=thin + else + ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" + AFL_CLANG_FLTO ?= -flto + endif + endif +endif + +ifeq "$(LLVM_LTO)" "1" + ifneq "$(AFL_CLANG_FLTO)" "" + ifeq "$(AFL_REAL_LD)" "" + ifneq "$(shell readlink $(LLVM_BINDIR)/ld.lld 2>&1)" "" + AFL_REAL_LD = $(LLVM_BINDIR)/ld.lld + else + $(warn ld.lld not found, can not enable LTO mode) + LLVM_LTO = 0 + endif + endif + endif +endif + +AFL_CLANG_FUSELD= +ifneq "$(AFL_CLANG_FLTO)" "" +ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -fuse-ld=`command -v ld` -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" + AFL_CLANG_FUSELD=1 +endif +endif + +CFLAGS ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2 +override CFLAGS += -Wall \ + -g -Wno-pointer-sign -I ../include/ \ + -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ + -DLLVM_BINDIR=\"$(LLVM_BINDIR)\" -DVERSION=\"$(VERSION)\" \ + -DLLVM_VERSION=\"$(LLVMVER)\" -DAFL_CLANG_FLTO=\"$(AFL_CLANG_FLTO)\" \ + -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -DAFL_CLANG_FUSELD=\"$(AFL_CLANG_FUSELD)\" -Wno-unused-function +ifdef AFL_TRACE_PC + $(info Compile option AFL_TRACE_PC is deprecated, just set AFL_LLVM_INSTRUMENT=PCGUARD to activate when compiling targets ) +endif + +CXXFLAGS ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2 +override CXXFLAGS += -Wall -g -I ../include/ \ + -DVERSION=\"$(VERSION)\" -Wno-variadic-macros + +CLANG_CFL = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS) +CLANG_LFL = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS) + + +# User teor2345 reports that this is required to make things work on MacOS X. +ifeq "$(shell uname)" "Darwin" + CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress +endif + +ifeq "$(shell uname)" "OpenBSD" + CLANG_LFL += `$(LLVM_CONFIG) --libdir`/libLLVM.so +endif + +ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1" + SHMAT_OK=1 +else + SHMAT_OK=0 + CFLAGS+=-DUSEMMAP=1 + LDFLAGS += -lrt +endif + +ifeq "$(TEST_MMAP)" "1" + SHMAT_OK=0 + CFLAGS+=-DUSEMMAP=1 + LDFLAGS += -lrt +endif + + PROGS = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so + +# If prerequisites are not given, warn, do not build anything, and exit with code 0 +ifeq "$(LLVMVER)" "" + NO_BUILD = 1 +endif + +ifneq "$(LLVM_UNSUPPORTED)$(LLVM_APPLE)" "00" + NO_BUILD = 1 +endif + +ifeq "$(NO_BUILD)" "1" + TARGETS = no_build +else + TARGETS = test_shm test_deps $(PROGS) afl-clang-fast.8 test_build all_done +endif + +LLVM_MIN_4_0_1 = $(shell awk 'function tonum(ver, a) {split(ver,a,"."); return a[1]*1000000+a[2]*1000+a[3]} BEGIN { exit tonum(ARGV[1]) >= tonum(ARGV[2]) }' $(LLVMVER) 4.0.1; echo $$?) + +all: $(TARGETS) + +ifeq "$(SHMAT_OK)" "1" + +test_shm: + @echo "[+] shmat seems to be working." + @rm -f .test2 + +else + +test_shm: + @echo "[-] shmat seems not to be working, switching to mmap implementation" + +endif + +no_build: + @printf "%b\\n" "\\033[0;31mPrerequisites are not met, skipping build llvm_mode\\033[0m" + +test_deps: + @echo "[*] Checking for working 'llvm-config'..." + ifneq "$(LLVM_APPLE)" "1" + @type $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo " (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 ) + endif + @echo "[*] Checking for working '$(CC)'..." + @type $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 ) + @echo "[*] Checking for matching versions of '$(CC)' and '$(LLVM_CONFIG)'" +ifneq "$(CLANGVER)" "$(LLVMVER)" + @echo "[!] WARNING: we have llvm-config version $(LLVMVER) and a clang version $(CLANGVER)" + @echo "[!] Retrying with the clang compiler from llvm: CC=`llvm-config --bindir`/clang" +else + @echo "[*] We have llvm-config version $(LLVMVER) with a clang version $(CLANGVER), good." +endif + @echo "[*] Checking for '../afl-showmap'..." + @test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 ) + @echo "[+] All set and ready to build." + +afl-common.o: ../src/afl-common.c + $(CC) $(CFLAGS) -c $< -o $@ $(LDFLAGS) + +../afl-clang-fast: afl-clang-fast.c afl-common.o | test_deps + $(CC) $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\" + ln -sf afl-clang-fast ../afl-clang-fast++ +ifneq "$(AFL_CLANG_FLTO)" "" +ifeq "$(LLVM_LTO)" "1" + ln -sf afl-clang-fast ../afl-clang-lto + ln -sf afl-clang-fast ../afl-clang-lto++ +endif +endif + +../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps + -$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) + +../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps +ifeq "$(LLVM_MIN_4_0_1)" "0" + $(info [!] N-gram branch coverage instrumentation is not available for llvm version $(LLVMVER)) +endif + $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) + +../afl-llvm-lto-whitelist.so: afl-llvm-lto-whitelist.so.cc +ifeq "$(LLVM_LTO)" "1" + $(CXX) $(CLANG_CFL) -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) +endif + +../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc +ifeq "$(LLVM_LTO)" "1" + $(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) + $(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto.o + @$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m64 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-64.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi + @$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m32 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-32.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi +endif + +# laf +../split-switches-pass.so: split-switches-pass.so.cc | test_deps + $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) +../compare-transform-pass.so: compare-transform-pass.so.cc | test_deps + $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) +../split-compares-pass.so: split-compares-pass.so.cc | test_deps + $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) +# /laf + +../cmplog-routines-pass.so: cmplog-routines-pass.cc | test_deps + $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) + +../cmplog-instructions-pass.so: cmplog-instructions-pass.cc | test_deps + $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) + +../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps + $(CC) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@ + +../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps + @printf "[*] Building 32-bit variant of the runtime (-m32)... " + @$(CC) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + +../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps + @printf "[*] Building 64-bit variant of the runtime (-m64)... " + @$(CC) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + +test_build: $(PROGS) + @echo "[*] Testing the CC wrapper and instrumentation output..." + unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS) + ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null + echo 1 | ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr1 ./test-instr + @rm -f test-instr + @cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi + @echo "[+] All right, the instrumentation seems to be working!" + +all_done: test_build + @echo "[+] All done! You can now use '../afl-clang-fast' to compile programs." + +.NOTPARALLEL: clean + +install: all + install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH) + if [ -f ../afl-clang-fast -a -f ../libLLVMInsTrim.so -a -f ../afl-llvm-rt.o ]; then set -e; install -m 755 ../afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 ../libLLVMInsTrim.so ../afl-llvm-pass.so ../afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../afl-clang-lto ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-llvm-lto-instrumentation.so ../afl-llvm-rt-lto*.o ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../afl-llvm-rt-32.o ]; then set -e; install -m 755 ../afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../afl-llvm-rt-64.o ]; then set -e; install -m 755 ../afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../compare-transform-pass.so ]; then set -e; install -m 755 ../compare-transform-pass.so $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../split-compares-pass.so ]; then set -e; install -m 755 ../split-compares-pass.so $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../split-switches-pass.so ]; then set -e; install -m 755 ../split-switches-pass.so $${DESTDIR}$(HELPER_PATH); fi + if [ -f ../cmplog-instructions-pass.so ]; then set -e; install -m 755 ../cmplog-*-pass.so $${DESTDIR}$(HELPER_PATH); fi + set -e; if [ -f ../afl-clang-fast ] ; then ln -sf ../afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf ../afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang++ ; else ln -sf ../afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf ../afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang++; fi + install -m 644 README.*.md $${DESTDIR}$(DOC_PATH)/ + install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.llvm_mode.md + +vpath % .. +%.8: % + @echo .TH $* 8 `date "+%Y-%m-%d"` "afl++" > ../$@ + @echo .SH NAME >> ../$@ + @echo .B $* >> ../$@ + @echo >> ../$@ + @echo .SH SYNOPSIS >> ../$@ + @../$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> ../$@ + @echo >> ../$@ + @echo .SH OPTIONS >> ../$@ + @echo .nf >> ../$@ + @../$* -h 2>&1 | tail -n +4 >> ../$@ + @echo >> ../$@ + @echo .SH AUTHOR >> ../$@ + @echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de>, Andrea Fioraldi <andreafioraldi@gmail.com> and Dominik Maier <domenukk@gmail.com>" >> ../$@ + @echo The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@ + @echo >> ../$@ + @echo .SH LICENSE >> ../$@ + @echo Apache License Version 2.0, January 2004 >> ../$@ + ln -sf afl-clang-fast.8 ../afl-clang-fast++.8 +ifneq "$(AFL_CLANG_FLTO)" "" +ifeq "$(LLVM_LTO)" "1" + ln -sf afl-clang-fast.8 ../afl-clang-lto.8 + ln -sf afl-clang-fast.8 ../afl-clang-lto++.8 +endif +endif + +clean: + rm -f *.o *.so *~ a.out core core.[1-9][0-9]* .test2 test-instr .test-instr0 .test-instr1 *.dwo + rm -f $(PROGS) afl-common.o ../afl-clang-fast++ ../afl-clang-lto ../afl-clang-lto++ ../afl-clang*.8 ../ld ../afl-ld ../afl-llvm-rt*.o diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index a94eb907..8b23942c 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -509,10 +509,11 @@ struct InsTrim : public ModulePass { if (!be_quiet) { char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s", + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", getenv("AFL_HARDEN") ? "hardened" : "non-hardened", getenv("AFL_USE_ASAN") ? ", ASAN" : "", getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 5ce0e579..0b306dde 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -1,349 +1,2 @@ -# -# american fuzzy lop++ - LLVM instrumentation -# ----------------------------------------- -# -# Written by Laszlo Szekeres <lszekeres@google.com> and -# Michal Zalewski -# -# LLVM integration design comes from Laszlo Szekeres. -# -# Copyright 2015, 2016 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# - -# For Heiko: -#TEST_MMAP=1 -HASH=\# - -PREFIX ?= /usr/local -HELPER_PATH = $(PREFIX)/lib/afl -BIN_PATH = $(PREFIX)/bin - -VERSION = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2) - -ifeq "$(shell uname)" "OpenBSD" - LLVM_CONFIG ?= $(BIN_PATH)/llvm-config - HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1) - ifeq "$(HAS_OPT)" "1" - $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") - endif -else - LLVM_CONFIG ?= llvm-config -endif - -LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) -LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) -LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') -LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) -LLVM_STDCXX = gnu++11 -LLVM_APPLE = $(shell clang -v 2>&1 | grep -iq apple && echo 1 || echo 0) -LLVM_LTO = 0 - -ifeq "$(LLVMVER)" "" - $(warning [!] llvm_mode needs llvm-config, which was not found) -endif - -ifeq "$(LLVM_UNSUPPORTED)" "1" - $(warning llvm_mode only supports llvm versions 3.8.0 up to 11) -endif - -ifeq "$(LLVM_MAJOR)" "9" - $(info [+] llvm_mode detected llvm 9, enabling neverZero implementation) - $(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation) - LLVM_LTO = 1 -endif - -ifeq "$(LLVM_NEW_API)" "1" - $(info [+] llvm_mode detected llvm 10+, enabling neverZero implementation and c++14) - $(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation) - LLVM_STDCXX = c++14 - LLVM_LTO = 1 -endif - -ifeq "$(LLVM_LTO)" "0" - $(info [+] llvm_mode detected llvm < 9, afl-clang-lto LTO will not be build.) -endif - -ifeq "$(LLVM_APPLE)" "1" - $(warning llvm_mode will not compile with Xcode clang...) -endif - -# We were using llvm-config --bindir to get the location of clang, but -# this seems to be busted on some distros, so using the one in $PATH is -# probably better. - -CC = $(LLVM_BINDIR)/clang -CXX = $(LLVM_BINDIR)/clang++ - -ifeq "$(shell test -e $(CC) || echo 1 )" "1" - # llvm-config --bindir may not providing a valid path, so ... - ifeq "$(shell test -e '$(BIN_DIR)/clang' && echo 1)" "1" - # we found one in the local install directory, lets use these - CC = $(BIN_DIR)/clang - CXX = $(BIN_DIR)/clang++ - else - # hope for the best - $(warning we have trouble finding clang/clang++ - llvm-config is not helping us) - CC = clang - CXX = clang++ - endif -endif - -# sanity check. -# Are versions of clang --version and llvm-config --version equal? -CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*version\ ([0-9]\.[0-9]\.[0-9]).*/s//\1/p') - -ifneq "$(CLANGVER)" "$(LLVMVER)" - CC = $(shell $(LLVM_CONFIG) --bindir)/clang - CXX = $(shell $(LLVM_CONFIG) --bindir)/clang++ -endif - -# After we set CC/CXX we can start makefile magic tests - -ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - CFLAGS_OPT = -march=native -endif - -ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - AFL_CLANG_FLTO ?= -flto=full -else - ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - AFL_CLANG_FLTO ?= -flto=thin - else - ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - AFL_CLANG_FLTO ?= -flto - endif - endif -endif - -ifneq "$(AFL_CLANG_FLTO)" "" -ifeq "$(AFL_REAL_LD)" "" - AFL_REAL_LD = $(shell readlink /bin/ld 2>/dev/null) - ifeq "$(AFL_REAL_LD)" "" - AFL_REAL_LD = $(shell readlink /usr/bin/ld 2>/dev/null) - endif -endif -endif - -CFLAGS ?= -O3 -funroll-loops -override CFLAGS = -Wall \ - -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign -I ../include/ \ - -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ - -DLLVM_BINDIR=\"$(LLVM_BINDIR)\" -DVERSION=\"$(VERSION)\" \ - -DLLVM_VERSION=\"$(LLVMVER)\" -DAFL_CLANG_FLTO=\"$(AFL_CLANG_FLTO)\" \ - -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -Wno-unused-function -ifdef AFL_TRACE_PC - CFLAGS += -DUSE_TRACE_PC=1 -endif - -CXXFLAGS ?= -O3 -funroll-loops -override CXXFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -I ../include/ \ - -DVERSION=\"$(VERSION)\" -Wno-variadic-macros - -CLANG_CFL = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS) -CLANG_LFL = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS) - - -# User teor2345 reports that this is required to make things work on MacOS X. -ifeq "$(shell uname)" "Darwin" - CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress -endif - -ifeq "$(shell uname)" "OpenBSD" - CLANG_LFL += `$(LLVM_CONFIG) --libdir`/libLLVM.so -endif - -ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -fuse-ld=`type ld | awk '{print $$NF}'` -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - CFLAGS += -DAFL_CLANG_FUSELD=1 -endif - -ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1" - SHMAT_OK=1 -else - SHMAT_OK=0 - CFLAGS+=-DUSEMMAP=1 - LDFLAGS += -lrt -endif - -ifeq "$(TEST_MMAP)" "1" - SHMAT_OK=0 - CFLAGS+=-DUSEMMAP=1 - LDFLAGS += -lrt -endif - -ifndef AFL_TRACE_PC - PROGS = ../afl-clang-fast ../afl-ld ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so -else - PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so -endif - -# If prerequisites are not given, warn, do not build anything, and exit with code 0 -ifeq "$(LLVMVER)" "" - NO_BUILD = 1 -endif - -ifneq "$(LLVM_UNSUPPORTED)$(LLVM_APPLE)" "00" - NO_BUILD = 1 -endif - -ifeq "$(NO_BUILD)" "1" - TARGETS = no_build -else - TARGETS = test_shm test_deps $(PROGS) afl-clang-fast.8 test_build all_done -endif - -all: $(TARGETS) - -ifeq "$(SHMAT_OK)" "1" - -test_shm: - @echo "[+] shmat seems to be working." - @rm -f .test2 - -else - -test_shm: - @echo "[-] shmat seems not to be working, switching to mmap implementation" - -endif - -no_build: - @printf "%b\\n" "\\033[0;31mPrerequisites are not met, skipping build llvm_mode\\033[0m" - -test_deps: -ifndef AFL_TRACE_PC - @echo "[*] Checking for working 'llvm-config'..." - ifneq "$(LLVM_APPLE)" "1" - @type $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo " (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 ) - endif -else - @echo "[!] Note: using -fsanitize=trace-pc mode (this will fail with older LLVM)." -endif - @echo "[*] Checking for working '$(CC)'..." - @type $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 ) - @echo "[*] Checking for matching versions of '$(CC)' and '$(LLVM_CONFIG)'" -ifneq "$(CLANGVER)" "$(LLVMVER)" - @echo "[!] WARNING: we have llvm-config version $(LLVMVER) and a clang version $(CLANGVER)" - @echo "[!] Retrying with the clang compiler from llvm: CC=`llvm-config --bindir`/clang" -else - @echo "[*] We have llvm-config version $(LLVMVER) with a clang version $(CLANGVER), good." -endif - @echo "[*] Checking for '../afl-showmap'..." - @test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 ) - @echo "[+] All set and ready to build." - -afl-common.o: ../src/afl-common.c - $(CC) $(CFLAGS) -c $< -o $@ $(LDFLAGS) - -../afl-clang-fast: afl-clang-fast.c afl-common.o | test_deps - $(CC) $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\" - ln -sf afl-clang-fast ../afl-clang-fast++ -ifneq "$(AFL_CLANG_FLTO)" "" -ifeq "$(LLVM_LTO)" "1" - ln -sf afl-clang-fast ../afl-clang-lto - ln -sf afl-clang-fast ../afl-clang-lto++ -endif -endif - -../afl-ld: afl-ld.c -ifneq "$(AFL_CLANG_FLTO)" "" -ifeq "$(LLVM_LTO)" "1" - $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) - ln -sf afl-ld ../ld - @rm -f .test-instr - @-export AFL_QUIET=1 AFL_PATH=.. PATH="..:$(PATH)" ; ../afl-clang-lto -Wl,--afl -o .test-instr ../test-instr.c && echo "[+] afl-clang-lto and afl-ld seem to work fine :)" || echo "[!] WARNING: clang seems to have a hardcoded "'/bin/ld'" - check README.lto" - @rm -f .test-instr -endif -endif -../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps - -$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) - -../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps - $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) - -../afl-llvm-lto-whitelist.so: afl-llvm-lto-whitelist.so.cc -ifeq "$(LLVM_LTO)" "1" - $(CXX) $(CLANG_CFL) -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) -endif - -../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc MarkNodes.cc -ifeq "$(LLVM_LTO)" "1" - $(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) -endif - -# laf -../split-switches-pass.so: split-switches-pass.so.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) -../compare-transform-pass.so: compare-transform-pass.so.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) -../split-compares-pass.so: split-compares-pass.so.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) -# /laf - -../cmplog-routines-pass.so: cmplog-routines-pass.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) - -../cmplog-instructions-pass.so: cmplog-instructions-pass.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) - -../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps - $(CC) $(CFLAGS) -fPIC -c $< -o $@ - -../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps - @printf "[*] Building 32-bit variant of the runtime (-m32)... " - @$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi - -../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps - @printf "[*] Building 64-bit variant of the runtime (-m64)... " - @$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi - -test_build: $(PROGS) - @echo "[*] Testing the CC wrapper and instrumentation output..." - unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS) - ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null - echo 1 | ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr1 ./test-instr - @rm -f test-instr - @cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi - @echo "[+] All right, the instrumentation seems to be working!" - -all_done: test_build - @echo "[+] All done! You can now use '../afl-clang-fast' to compile programs." - -.NOTPARALLEL: clean - -vpath % .. -%.8: % - @echo .TH $* 8 `date "+%Y-%m-%d"` "afl++" > ../$@ - @echo .SH NAME >> ../$@ - @echo .B $* >> ../$@ - @echo >> ../$@ - @echo .SH SYNOPSIS >> ../$@ - @../$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> ../$@ - @echo >> ../$@ - @echo .SH OPTIONS >> ../$@ - @echo .nf >> ../$@ - @../$* -h 2>&1 | tail -n +4 >> ../$@ - @echo >> ../$@ - @echo .SH AUTHOR >> ../$@ - @echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com>" >> ../$@ - @echo The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@ - @echo >> ../$@ - @echo .SH LICENSE >> ../$@ - @echo Apache License Version 2.0, January 2004 >> ../$@ - ln -sf afl-clang-fast.8 ../afl-clang-fast++.8 -ifneq "$(AFL_CLANG_FLTO)" "" -ifeq "$(LLVM_LTO)" "0" - ln -sf afl-clang-fast.8 ../afl-clang-lto.8 - ln -sf afl-clang-fast.8 ../afl-clang-lto++.8 -endif -endif - -clean: - rm -f *.o *.so *~ a.out core core.[1-9][0-9]* .test2 test-instr .test-instr0 .test-instr1 *.dwo - rm -f $(PROGS) afl-common.o ../afl-clang-fast++ ../afl-clang-lto ../afl-clang-lto++ ../afl-clang*.8 +all: + @echo please use GNU make, thanks! diff --git a/llvm_mode/README.ctx.md b/llvm_mode/README.ctx.md new file mode 100644 index 00000000..14255313 --- /dev/null +++ b/llvm_mode/README.ctx.md @@ -0,0 +1,22 @@ +# AFL Context Sensitive Branch Coverage + +## What is this? + +This is an LLVM-based implementation of the context sensitive branch coverage. + +Basically every function gets it's own ID and that ID is combined with the +edges of the called functions. + +So if both function A and function B call a function C, the coverage +collected in C will be different. + +In math the coverage is collected as follows: +`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1` + +## Usage + +Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable. + +It is highly recommended to increase the MAP_SIZE_POW2 definition in +config.h to at least 18 and maybe up to 20 for this as otherwise too +many map collisions occur. diff --git a/llvm_mode/README.instrim.md b/llvm_mode/README.instrim.md index 5c3f32c8..b905af11 100644 --- a/llvm_mode/README.instrim.md +++ b/llvm_mode/README.instrim.md @@ -5,13 +5,12 @@ InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing ## Introduction InsTrim uses CFG and markers to instrument just what is necessary in the -binary in llvm_mode. It is about 20-25% faster but as a cost has a lower -path discovery. +binary in llvm_mode. It is about 10-15% faster without disadvantages. ## Usage -Set the environment variable `AFL_LLVM_INSTRIM=1` during compilation of -the target. +Set the environment variable `AFL_LLVM_INSTRUMENT=CFG` or `AFL_LLVM_INSTRIM=1` +during compilation of the target. There is also an advanced mode which instruments loops in a way so that afl-fuzz can see which loop path has been selected but not being able to diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md index 28b3b045..51b50544 100644 --- a/llvm_mode/README.lto.md +++ b/llvm_mode/README.lto.md @@ -2,16 +2,17 @@ ## TLDR; -1. This compile mode is very frickle if it works it is amazing, if it fails - - well use afl-clang-fast +This version requires a current llvm 11 compiled from the github master. -2. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better +1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better coverage than anything else that is out there in the AFL world -3. You can use it together with llvm_mode: laf-intel and whitelisting +2. You can use it together with llvm_mode: laf-intel and whitelisting features and can be combined with cmplog/Redqueen -4. It only works with llvm 9 (and likely 10+ but is not tested there yet) +3. It only works with llvm 11 (current github master state) + +4. AUTODICTIONARY feature! see below ## Introduction and problem description @@ -34,33 +35,43 @@ and many dead ends until we got to this: * Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the correct LTO options and runs our own afl-ld linker instead of the system linker - * Our linker collects all LTO files to link and instruments them so that + * The LLVM linker collects all LTO files to link and instruments them so that we have non-colliding edge overage * We use a new (for afl) edge coverage - which is the same as in llvm -fsanitize=coverage edge coverage mode :) - * after inserting our instrumentation in all interesting edges we link - all parts of the program together to our executable The result: - * 10-15% speed gain compared to llvm_mode + * 10-20% speed gain compared to llvm_mode * guaranteed non-colliding edge coverage :-) * The compile time especially for libraries can be longer Example build output from a libtiff build: ``` -/bin/bash ../libtool --tag=CC --mode=link afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/libtiff.la ../port/libport.la -llzma -ljbig -ljpeg -lz -lm libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm -afl-clang-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> -afl-ld++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> (level 0) -[+] Running ar unpacker on /prg/tests/lto/tiff-4.0.4/tools/../libtiff/.libs/libtiff.a into /tmp/.afl-3914343-1583339800.dir -[+] Running ar unpacker on /prg/tests/lto/tiff-4.0.4/tools/../port/.libs/libport.a into /tmp/.afl-3914343-1583339800.dir -[+] Running bitcode linker, creating /tmp/.afl-3914343-1583339800-1.ll -[+] Performing optimization via opt, creating /tmp/.afl-3914343-1583339800-2.bc -[+] Performing instrumentation via opt, creating /tmp/.afl-3914343-1583339800-3.bc -afl-llvm-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> -[+] Instrumented 15833 locations with no collisions (on average 1767 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode). -[+] Running real linker /bin/x86_64-linux-gnu-ld -[+] Linker was successful +afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO +afl-llvm-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> +AUTODICTIONARY: 11 strings found +[+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode). +``` + +## Building llvm 11 + +``` +$ sudo apt install binutils-dev # this is *essential*! +$ git clone https://github.com/llvm/llvm-project +$ cd llvm-project +$ mkdir build +$ cd build +$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/ +$ make -j $(nproc) +$ export PATH=`pwd`/bin:$PATH +$ export LLVM_CONFIG=`pwd`/bin/llcm-config +$ cd /path/to/AFLplusplus/ +$ make +$ cd llvm_mode +$ make +$ cd .. +$ make install ``` ## How to use afl-clang-lto @@ -77,6 +88,13 @@ CC=afl-clang-lto CXX=afl-clang-lto++ ./configure make ``` +## AUTODICTIONARY feature + +Setting `AFL_LLVM_LTO_AUTODICTIONARY` will generate a dictionary in the +target binary based on string compare and memory compare functions. +afl-fuzz will automatically get these transmitted when starting to fuzz. +This improves coverage on a lot of targets. + ## Potential issues ### compiling libraries fails @@ -94,145 +112,16 @@ AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure - ``` and on some target you have to to AR=/RANLIB= even for make as the configure script does not save it ... -### "linking globals named '...': symbol multiply defined" error - -The target program is using multiple global variables or functions with the -same name. This is a common error when compiling a project with LTO, and -the fix is `-Wl,--allow-multiple-definition` - however llvm-link which we -need to link all llvm IR LTO files does not support this - yet (hopefully). -Hence if you see this error either you have to remove the duplicate global -variable (think `#ifdef` ...) or you are out of luck. :-( - -### "expected top-level entity" + binary ouput error - -This happens if multiple .a archives are to be linked and they contain the -same object filenames, the first in LTO form, the other in ELF form. -This can not be fixed programmatically, but can be fixed by hand. -You can try to delete the file from either archive -(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing -and instrumentation by hand (see below). - -### "undefined reference to ..." - -This *can* be the opposite situation of the "expected top-level entity" error - -the library with the ELF file is before the LTO library. -However it can also be a bug in the program - try to compile it normally. If -fails then it is a bug in the program. -Solutions: You can try to delete the file from either archive, e.g. -(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing -and instrumentation by hand (see below). - -### "File format not recognized" - -This happens if the build system has fixed LDFLAGS, CPPFLAGS, CXXFLAGS and/or -CFLAGS. Ensure that they all contain the `-flto` flag that afl-clang-lto was -compiled with (you can see that by typing `afl-clang-lto -h` and inspecting -the last line of the help output) and add them otherwise - -### clang is hardcoded to /bin/ld - -Some clang packages have 'ld' hardcoded to /bin/ld. This is an issue as this -prevents "our" afl-ld being called. - --fuse-ld=/path/to/afl-ld should be set through makefile magic in llvm_mode - -if it is supported - however if this fails you can try: -``` -LDFLAGS=-fuse-ld=</path/to/afl-ld -``` - -As workaround attempt #2 you will have to switch /bin/ld: -``` - mv /bin/ld /bin/ld.orig - cp afl-ld /bin/ld -``` -This can result in two problems though: - - !1! - When compiling afl-ld, the build process looks at where the /bin/ld link - is going to. So when the workaround was applied and a recompiling afl-ld - is performed then the link is gone and the new afl-ld clueless where - the real ld is. - In this case set AFL_REAL_LD=/bin/ld.orig - - !2! - When you install an updated gcc/clang/... package, your OS might restore - the ld link. - -### Performing the steps by hand - -It is possible to perform all the steps afl-ld by hand to workaround issues -in the target. - -1. Recompile with AFL_DEBUG=1 and collect the afl-clang-lto command that fails - e.g.: `AFL_DEBUG=1 make 2>&1 | grep afl-clang-lto | tail -n 1` - -2. run this command prepended with AFL_DEBUG=1 and collect the afl-ld command - parameters, e.g. `AFL_DEBUG=1 afl-clang-lto[++] .... | grep /afl/ld` - -3. for every .a archive you want to instrument unpack it into a seperate - directory, e.g. - `mkdir archive1.dir ; cd archive1.dir ; llvm-link x ../<archive>.a` - -4. run `file archive*.dir/*.o` and make two lists, one containing all ELF files - and one containing all LLVM IR bitcode files. - You do the same for all .o files of the ../afl/ld command options - -5. Create a single bitcode file by using llvm-link, e.g. - `llvm-link -o all-bitcode.bc <list of all LLVM IR .o files>` - If this fails it is game over - or you modify the source code - -6. Run the optimizer on the new bitcode file: - `opt -O3 --polly -o all-optimized.bc all-bitcode.bc` - -7. Instrument the optimized bitcode file: - `opt --load=$AFL_PATH/afl-llvm-lto-instrumentation.so --disable-opt --afl-lto all-optimized.bc -o all-instrumented.bc - -8. If the parameter `--allow-multiple-definition` is not in the list, add it - as first command line option. - -9. Link everything together. - a) You use the afl-ld command and instead of e.g. `/usr/local/lib/afl/ld` - you replace that with `ld`, the real linker. - b) Every .a archive you instrumented files from you remove the <archive>.a - or -l<archive> from the command - c) If you have entries in your ELF files list (see step 4), you put them to - the command line - but them in the same order! - d) put the all-instrumented.bc before the first library or .o file - e) run the command and hope it compiles, if it doesn't you have to analyze - what the issue is and fix that in the approriate step above. - -Yes this is long and complicated. That is why there is afl-ld doing this and -that why this can easily fail and not all different ways how it *can* fail can -be implemented ... - ### compiling programs still fail afl-clang-lto is still work in progress. -Complex targets are still likely not to compile and this needs to be fixed. Please report issues at: [https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226) -Known issues: -* ffmpeg -* bogofilter -* libjpeg-turbo-1.3.1 - ## Upcoming Work -1. Currently the LTO whitelist feature does not allow to not instrument main, start and init functions -2. Modify the forkserver + afl-fuzz so that only the necessary map size is - loaded and used - and communicated to afl-fuzz too. - Result: faster fork in the target and faster map analysis in afl-fuzz - => more speed :-) - -## Tested and working targets - -* libpng-1.2.53 -* libxml2-2.9.2 -* tiff-4.0.4 -* unrar-nonfree-5.6.6 -* exiv 0.27 -* jpeg-6b +1. Currently the LTO whitelist feature does not allow to not instrument main, + start and init functions ## History @@ -252,11 +141,17 @@ very difficult with a program that has so many paths and therefore so many dependencies. At lot of stratgies were implemented - and failed. And then sat solvers were tried, but with over 10.000 variables that turned out to be a dead-end too. + The final idea to solve this came from domenukk who proposed to insert a block into an edge and then just use incremental counters ... and this worked! After some trials and errors to implement this vanhauser-thc found out that there is actually an llvm function for this: SplitEdge() :-) + Still more problems came up though as this only works without bugs from llvm 9 onwards, and with high optimization the link optimization ruins the instrumented control flow graph. -As long as there are no larger changes in llvm this all should work well now ... + +This is all now fixed with llvm 11. The llvm's own linker is now able to +load passes and this bypasses all problems we had. + +Happy end :) diff --git a/llvm_mode/README.md b/llvm_mode/README.md index e6c47c9c..607350fb 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -92,13 +92,33 @@ which C/C++ files to actually instrument. See [README.whitelist](README.whitelis For splitting memcmp, strncmp, etc. please see [README.laf-intel](README.laf-intel.md) -Then there is an optimized instrumentation strategy that uses CFGs and -markers to just instrument what is needed. This increases speed by 20-25% -however has a lower path discovery. -If you want to use this, set AFL_LLVM_INSTRIM=1 +Then there are different ways of instrumenting the target: + +1. There is an optimized instrumentation strategy that uses CFGs and +markers to just instrument what is needed. This increases speed by 10-15% +without any disadvantages +If you want to use this, set AFL_LLVM_INSTRUMENT=CFG or AFL_LLVM_INSTRIM=1 See [README.instrim](README.instrim.md) -A new instrumentation called CmpLog is also available as an alternative to +2. An even better instrumentation strategy uses LTO and link time +instrumentation. Note that not all targets can compile in this mode, however +if it works it is the best option you can use. +Simply use afl-clang-lto/afl-clang-lto++ to use this option. +See [README.lto](README.lto.md) + +3. Alternativly you can choose a completely different coverage method: + +3a. N-GRAM coverage - which combines the previous visited edges with the +current one. This explodes the map but on the other hand has proven to be +effective for fuzzing. +See [README.ngram](README.ngram.md) + +3b. Context sensitive coverage - which combines the visited edges with an +individual caller ID (the function that called the current one) +[README.ctx](README.ctx.md) + +Then - additionally to one of the instrumentation options above - there is +a very effective new instrumentation option called CmpLog as an alternative to laf-intel that allow AFL++ to apply mutations similar to Redqueen. See [README.cmplog](README.cmplog.md) @@ -109,12 +129,18 @@ is not optimal and was only fixed in llvm 9. You can set this with AFL_LLVM_NOT_ZERO=1 See [README.neverzero](README.neverzero.md) -## 4) Gotchas, feedback, bugs +## 4) Snapshot feature + +To speed up fuzzing you can use a linux loadable kernel module which enables +a snapshot feature. +See [README.snapshot](README.snapshot.md) + +## 5) Gotchas, feedback, bugs This is an early-stage mechanism, so field reports are welcome. You can send bug reports to <afl-users@googlegroups.com>. -## 5) Bonus feature #1: deferred initialization +## 6) Bonus feature #1: deferred initialization AFL tries to optimize performance by executing the targeted binary just once, stopping it just before main(), and then cloning this "master" process to get @@ -162,7 +188,7 @@ will keep working normally when compiled with a tool other than afl-clang-fast. Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will *not* generate a deferred-initialization binary) - and you should be all set! -## 6) Bonus feature #2: persistent mode +## 7) Bonus feature #2: persistent mode Some libraries provide APIs that are stateless, or whose state can be reset in between processing different input files. When such a reset is performed, a diff --git a/llvm_mode/README.ngram.md b/llvm_mode/README.ngram.md new file mode 100644 index 00000000..de3ba432 --- /dev/null +++ b/llvm_mode/README.ngram.md @@ -0,0 +1,28 @@ +# AFL N-Gram Branch Coverage + +## Source + +This is an LLVM-based implementation of the n-gram branch coverage proposed in +the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics +in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf), +by Jinghan Wang, et. al. + +Note that the original implementation (available +[here](https://github.com/bitsecurerlab/afl-sensitive)) +is built on top of AFL's QEMU mode. +This is essentially a port that uses LLVM vectorized instructions to achieve +the same results when compiling source code. + +In math the branch coverage is performed as follows: +`map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1` + +## Usage + +The size of `n` (i.e., the number of branches to remember) is an option +that is specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the +`AFL_LLVM_NGRAM_SIZE` environment variable. +Good values are 2, 4 or 8, valid are 2-16. + +It is highly recommended to increase the MAP_SIZE_POW2 definition in +config.h to at least 18 and maybe up to 20 for this as otherwise too +many map collisions occur. diff --git a/llvm_mode/README.snapshot.md b/llvm_mode/README.snapshot.md new file mode 100644 index 00000000..9c12a8ba --- /dev/null +++ b/llvm_mode/README.snapshot.md @@ -0,0 +1,16 @@ +# AFL++ snapshot feature + +Snapshotting is a feature that makes a snapshot from a process and then +restores it's state, which is faster then forking it again. + +All targets compiled with llvm_mode are automatically enabled for the +snapshot feature. + +To use the snapshot feature for fuzzing compile and load this kernel +module: [https://github.com/AFLplusplus/AFL-Snapshot-LKM](https://github.com/AFLplusplus/AFL-Snapshot-LKM) + +Note that is has little value for persistent (__AFL_LOOP) fuzzing. + +## Notes + +Snapshot does not work with multithreaded targets yet. Still in WIP, it is now usable only for single threaded applications. diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index 313a2533..b7ef1858 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -29,11 +29,13 @@ #include "types.h" #include "debug.h" #include "alloc-inl.h" +#include "llvm-ngram-coverage.h" #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <string.h> +#include <strings.h> #include <limits.h> #include <assert.h> @@ -41,14 +43,33 @@ static u8 * obj_path; /* Path to runtime libraries */ static u8 **cc_params; /* Parameters passed to the real CC */ static u32 cc_par_cnt = 1; /* Param count, including argv0 */ static u8 llvm_fullpath[PATH_MAX]; -static u8 lto_mode; +static u8 instrument_mode; static u8 * lto_flag = AFL_CLANG_FLTO; static u8 * march_opt = CFLAGS_OPT; static u8 debug; static u8 cwd[4096]; static u8 cmplog_mode; u8 use_stdin = 0; /* dummy */ -u8 be_quiet = 0; + +enum { + + INSTRUMENT_CLASSIC = 0, + INSTRUMENT_AFL = 0, + INSTRUMENT_DEFAULT = 0, + INSTRUMENT_PCGUARD = 1, + INSTRUMENT_INSTRIM = 2, + INSTRUMENT_CFG = 2, + INSTRUMENT_LTO = 3, + INSTRUMENT_CTX = 4, + INSTRUMENT_NGRAM = 5 // + ngram value of 2-16 = 7 - 21 + +}; + +char instrument_mode_string[6][16] = { + + "DEFAULT", "PCGUARD", "CFG", "LTO", "CTX", + +}; u8 *getthecwd() { @@ -135,9 +156,9 @@ static void find_obj(u8 *argv0) { /* Copy argv to cc_params, making the necessary edits. */ -static void edit_params(u32 argc, char **argv) { +static void edit_params(u32 argc, char **argv, char **envp) { - u8 fortify_set = 0, asan_set = 0, x_set = 0, maybe_linking = 1, bit_mode = 0; + u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0; u8 has_llvm_config = 0; u8 *name; @@ -151,20 +172,11 @@ static void edit_params(u32 argc, char **argv) { has_llvm_config = (strlen(LLVM_BINDIR) > 0); - if (!strncmp(name, "afl-clang-lto", strlen("afl-clang-lto"))) { - -#ifdef USE_TRACE_PC - FATAL("afl-clang-lto does not work with TRACE_PC mode"); -#endif + if (instrument_mode == INSTRUMENT_LTO) if (lto_flag[0] != '-') FATAL( - "afl-clang-lto not possible because Makefile magic did not identify " - "the correct -flto flag"); - if (getenv("AFL_LLVM_INSTRIM") != NULL) - FATAL("afl-clang-lto does not work with InsTrim mode"); - lto_mode = 1; - - } + "Using afl-clang-lto is not possible because Makefile magic did not " + "identify the correct -flto flag"); if (!strcmp(name, "afl-clang-fast++") || !strcmp(name, "afl-clang-lto++")) { @@ -173,7 +185,7 @@ static void edit_params(u32 argc, char **argv) { snprintf(llvm_fullpath, sizeof(llvm_fullpath), "%s/clang++", LLVM_BINDIR); else sprintf(llvm_fullpath, "clang++"); - cc_params[0] = alt_cxx ? alt_cxx : (u8 *)llvm_fullpath; + cc_params[0] = alt_cxx && *alt_cxx ? alt_cxx : (u8 *)llvm_fullpath; } else { @@ -182,7 +194,7 @@ static void edit_params(u32 argc, char **argv) { snprintf(llvm_fullpath, sizeof(llvm_fullpath), "%s/clang", LLVM_BINDIR); else sprintf(llvm_fullpath, "clang"); - cc_params[0] = alt_cc ? alt_cc : (u8 *)llvm_fullpath; + cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)llvm_fullpath; } @@ -255,23 +267,7 @@ static void edit_params(u32 argc, char **argv) { } -#ifdef USE_TRACE_PC - - cc_params[cc_par_cnt++] = - "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default - // cc_params[cc_par_cnt++] = "-mllvm"; - // cc_params[cc_par_cnt++] = - // "-fsanitize-coverage=trace-cmp,trace-div,trace-gep"; - // cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0"; -#else - - if (lto_mode) { - - char *old_path = getenv("PATH"); - char *new_path = alloc_printf("%s:%s", AFL_PATH, old_path); - - setenv("PATH", new_path, 1); - setenv("AFL_LD", "1", 1); + if (instrument_mode == INSTRUMENT_LTO) { if (getenv("AFL_LLVM_WHITELIST") != NULL) { @@ -283,43 +279,38 @@ static void edit_params(u32 argc, char **argv) { } -#ifdef AFL_CLANG_FUSELD - cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s/afl-ld", AFL_PATH); -#endif - - cc_params[cc_par_cnt++] = "-B"; - cc_params[cc_par_cnt++] = AFL_PATH; - + cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD); + cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition"; + cc_params[cc_par_cnt++] = alloc_printf( + "-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path); cc_params[cc_par_cnt++] = lto_flag; - } else + } else { - if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || - getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) { + if (instrument_mode == INSTRUMENT_PCGUARD) { - cc_params[cc_par_cnt++] = - "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default + cc_params[cc_par_cnt++] = + "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default - } else { + } else { - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = "-load"; - cc_params[cc_par_cnt++] = "-Xclang"; - if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) - cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); - else - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + if (instrument_mode == INSTRUMENT_CFG) + cc_params[cc_par_cnt++] = + alloc_printf("%s/libLLVMInsTrim.so", obj_path); + else + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); - } + } -#endif /* ^USE_TRACE_PC */ + } cc_params[cc_par_cnt++] = "-Qunused-arguments"; /* Detect stray -v calls from ./configure scripts. */ - if (argc == 1 && !strcmp(argv[1], "-v")) maybe_linking = 0; - while (--argc) { u8 *cur = *(++argv); @@ -330,16 +321,11 @@ static void edit_params(u32 argc, char **argv) { if (!strcmp(cur, "-x")) x_set = 1; - if (!strcmp(cur, "-c") || !strcmp(cur, "-S") || !strcmp(cur, "-E")) - maybe_linking = 0; - if (!strcmp(cur, "-fsanitize=address") || !strcmp(cur, "-fsanitize=memory")) asan_set = 1; if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1; - if (!strcmp(cur, "-shared")) maybe_linking = 0; - if (!strcmp(cur, "-Wl,-z,defs") || !strcmp(cur, "-Wl,--no-undefined")) continue; @@ -389,14 +375,21 @@ static void edit_params(u32 argc, char **argv) { } -#ifdef USE_TRACE_PC + if (getenv("AFL_USE_CFISAN")) { - if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || - getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) - if (getenv("AFL_INST_RATIO")) - FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); + if (instrument_mode != INSTRUMENT_LTO) { + + uint32_t i = 0, found = 0; + while (envp[i] != NULL && !found) + if (strncmp("-flto", envp[i++], 5) == 0) found = 1; + if (!found) cc_params[cc_par_cnt++] = "-flto"; + + } + + cc_params[cc_par_cnt++] = "-fsanitize=cfi"; + cc_params[cc_par_cnt++] = "-fvisibility=hidden"; -#endif /* USE_TRACE_PC */ + } if (!getenv("AFL_DONT_OPTIMIZE")) { @@ -408,7 +401,11 @@ static void edit_params(u32 argc, char **argv) { } - if (getenv("AFL_NO_BUILTIN")) { + if (getenv("AFL_NO_BUILTIN") || getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES") || + getenv("LAF_TRANSFORM_COMPARES") || + (instrument_mode == INSTRUMENT_LTO && + (getenv("AFL_LLVM_LTO_AUTODICTIONARY") || + getenv("AFL_LLVM_AUTODICTIONARY")))) { cc_params[cc_par_cnt++] = "-fno-builtin-strcmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncmp"; @@ -477,44 +474,57 @@ static void edit_params(u32 argc, char **argv) { #endif /* ^__APPLE__ */ "_I(); } while (0)"; - if (maybe_linking) { + if (x_set) { - if (x_set) { + cc_params[cc_par_cnt++] = "-x"; + cc_params[cc_par_cnt++] = "none"; - cc_params[cc_par_cnt++] = "-x"; - cc_params[cc_par_cnt++] = "none"; - - } + } #ifndef __ANDROID__ - switch (bit_mode) { - - case 0: - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path); - break; - - case 32: - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path); - + switch (bit_mode) { + + case 0: + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path); + if (instrument_mode == INSTRUMENT_LTO) + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-rt-lto.o", obj_path); + break; + + case 32: + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path); + if (access(cc_params[cc_par_cnt - 1], R_OK)) + FATAL("-m32 is not supported by your compiler"); + if (instrument_mode == INSTRUMENT_LTO) { + + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-rt-lto-32.o", obj_path); if (access(cc_params[cc_par_cnt - 1], R_OK)) FATAL("-m32 is not supported by your compiler"); - break; + } - case 64: - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path); + break; + case 64: + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path); + if (access(cc_params[cc_par_cnt - 1], R_OK)) + FATAL("-m64 is not supported by your compiler"); + if (instrument_mode == INSTRUMENT_LTO) { + + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-rt-lto-64.o", obj_path); if (access(cc_params[cc_par_cnt - 1], R_OK)) FATAL("-m64 is not supported by your compiler"); - break; - - } + } -#endif + break; } +#endif + cc_params[cc_par_cnt] = NULL; } @@ -524,7 +534,7 @@ static void edit_params(u32 argc, char **argv) { int main(int argc, char **argv, char **envp) { int i; - char *callname = "afl-clang-fast"; + char *callname = "afl-clang-fast", *ptr; if (getenv("AFL_DEBUG")) { @@ -535,87 +545,222 @@ int main(int argc, char **argv, char **envp) { be_quiet = 1; - if (strstr(argv[0], "afl-clang-lto") != NULL) callname = "afl-clang-lto"; +#ifdef USE_TRACE_PC + instrument_mode = INSTRUMENT_PCGUARD; +#endif - if (argc < 2 || strcmp(argv[1], "-h") == 0) { + if ((ptr = getenv("AFL_LLVM_INSTRUMENT")) != NULL) { + + if (strncasecmp(ptr, "cfg", strlen("cfg")) == 0 || + strncasecmp(ptr, "instrim", strlen("instrim")) == 0) + instrument_mode = INSTRUMENT_CFG; + else if (strncasecmp(ptr, "pc-guard", strlen("pc-guard")) == 0 || + strncasecmp(ptr, "pcguard", strlen("pcgard")) == 0) + instrument_mode = INSTRUMENT_PCGUARD; + else if (strncasecmp(ptr, "lto", strlen("lto")) == 0) + instrument_mode = INSTRUMENT_LTO; + else if (strncasecmp(ptr, "ctx", strlen("ctx")) == 0) { + + instrument_mode = INSTRUMENT_CTX; + setenv("AFL_LLVM_CTX", "1", 1); + + } else if (strncasecmp(ptr, "ngram", strlen("ngram")) == 0) { + + ptr += strlen("ngram"); + while (*ptr && (*ptr < '0' || *ptr > '9')) + ptr++; + if (!*ptr) + if ((ptr = getenv("AFL_LLVM_NGRAM_SIZE")) != NULL) + FATAL( + "you must set the NGRAM size with (e.g. for value 2) " + "AFL_LLVM_INSTRUMENT=ngram-2"); + instrument_mode = INSTRUMENT_NGRAM + atoi(ptr); + if (instrument_mode < INSTRUMENT_NGRAM + 2 || + instrument_mode > INSTRUMENT_NGRAM + NGRAM_SIZE_MAX) + FATAL( + "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX " + "(%u)", + NGRAM_SIZE_MAX); + + ptr = alloc_printf("%u", instrument_mode - INSTRUMENT_NGRAM); + setenv("AFL_LLVM_NGRAM_SIZE", ptr, 1); + + } else if (strncasecmp(ptr, "classic", strlen("classic")) != 0 || + + strncasecmp(ptr, "default", strlen("default")) != 0 || + strncasecmp(ptr, "afl", strlen("afl")) != 0) + FATAL("unknown AFL_LLVM_INSTRUMENT value: %s", ptr); -#ifdef USE_TRACE_PC - printf(cCYA "afl-clang-fast" VERSION cRST - " [tpcg] by <lszekeres@google.com>\n") -#else - if (strstr(argv[0], "afl-clang-lto") == NULL) + } + + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) { + + if (instrument_mode == 0) + instrument_mode = INSTRUMENT_PCGUARD; + else if (instrument_mode != INSTRUMENT_PCGUARD) + FATAL("you can not set AFL_LLVM_INSTRUMENT and AFL_TRACE_PC together"); + + } + + if (getenv("AFL_LLVM_INSTRIM") || getenv("INSTRIM") || + getenv("INSTRIM_LIB")) { + + if (instrument_mode == 0) + instrument_mode = INSTRUMENT_CFG; + else if (instrument_mode != INSTRUMENT_CFG) + FATAL( + "you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_INSTRIM together"); + + } + + if (getenv("AFL_LLVM_CTX")) { + + if (instrument_mode == 0) + instrument_mode = INSTRUMENT_CTX; + else if (instrument_mode != INSTRUMENT_CTX) + FATAL("you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_CTX together"); + + } + + if (getenv("AFL_LLVM_NGRAM_SIZE")) { + + if (instrument_mode == 0) { + + instrument_mode = INSTRUMENT_NGRAM + atoi(getenv("AFL_LLVM_NGRAM_SIZE")); + if (instrument_mode < INSTRUMENT_NGRAM + 2 || + instrument_mode > INSTRUMENT_NGRAM + NGRAM_SIZE_MAX) + FATAL( + "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX " + "(%u)", + NGRAM_SIZE_MAX); + + } else if (instrument_mode != INSTRUMENT_NGRAM) + + FATAL( + "you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_NGRAM_SIZE " + "together"); + + } + + if (instrument_mode < INSTRUMENT_NGRAM) + ptr = instrument_mode_string[instrument_mode]; + else + ptr = alloc_printf("NGRAM-%u", instrument_mode - INSTRUMENT_NGRAM); + + if (strstr(argv[0], "afl-clang-lto") != NULL) { + + if (instrument_mode == 0 || instrument_mode == INSTRUMENT_LTO) { - printf(cCYA "afl-clang-fast" VERSION cRST " by <lszekeres@google.com>\n"); + callname = "afl-clang-lto"; + instrument_mode = INSTRUMENT_LTO; + ptr = instrument_mode_string[instrument_mode]; - else { + } else { - printf(cCYA "afl-clang-lto" VERSION cRST - " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); + if (!be_quiet) + WARNF("afl-clang-lto called with mode %s, using that mode instead", + ptr); } -#endif /* ^USE_TRACE_PC */ - - SAYF( - "\n" - "%s[++] [options]\n" - "\n" - "This is a helper application for afl-fuzz. It serves as a drop-in " - "replacement\n" - "for clang, letting you recompile third-party code with the " - "required " - "runtime\n" - "instrumentation. A common use pattern would be one of the " - "following:\n\n" - - " CC=%s/afl-clang-fast ./configure\n" - " CXX=%s/afl-clang-fast++ ./configure\n\n" - - "In contrast to the traditional afl-clang tool, this version is " - "implemented as\n" - "an LLVM pass and tends to offer improved performance with slow " - "programs.\n\n" - - "Environment variables used:\n" - "AFL_CC: path to the C compiler to use\n" - "AFL_CXX: path to the C++ compiler to use\n" - "AFL_PATH: path to instrumenting pass and runtime " - "(afl-llvm-rt.*o)\n" - "AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n" - "AFL_NO_BUILTIN: compile for use with libtokencap.so\n" - "AFL_INST_RATIO: percentage of branches to instrument\n" - "AFL_QUIET: suppress verbose output\n" - "AFL_DEBUG: enable developer debugging output\n" - "AFL_HARDEN: adds code hardening to catch memory bugs\n" - "AFL_USE_ASAN: activate address sanitizer\n" - "AFL_USE_MSAN: activate memory sanitizer\n" - "AFL_USE_UBSAN: activate undefined behaviour sanitizer\n" - "AFL_LLVM_WHITELIST: enable whitelisting (selective " - "instrumentation)\n" - "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n" - "AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n" - "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n" - "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n" - "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison " - "function calls\n" - " to cascaded comparisons\n" - "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to " - "cascaded " - "comp.\n" - "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n" - "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n" - "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed\n" - "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n" - "\nafl-clang-fast was built for llvm %s with the llvm binary path " - "of " - "\"%s\".\n", - callname, BIN_PATH, BIN_PATH, LLVM_VERSION, LLVM_BINDIR); - - if (strcmp(callname, "afl-clang-lto") == 0) - SAYF( - "Compiled with linker target \"%s\" and LTO flags \"%s\"\n\n" - "If anything fails - be sure to read README.lto.md!\n\n", - AFL_REAL_LD, AFL_CLANG_FLTO); + } + +#ifndef AFL_CLANG_FLTO + if (instrument_mode == INSTRUMENT_LTO) + FATAL("instrumentation mode LTO specified but LLVM support not available"); +#endif + + if (argc < 2 || strcmp(argv[1], "-h") == 0) { + + if (instrument_mode != INSTRUMENT_LTO) + printf("afl-clang-fast" VERSION " by <lszekeres@google.com> in %s mode\n", + ptr); + else + printf("afl-clang-lto" VERSION + " by Marc \"vanHauser\" Heuse <mh@mh-sec.de> in %s mode\n", + ptr); + + SAYF( + "\n" + "%s[++] [options]\n" + "\n" + "This is a helper application for afl-fuzz. It serves as a drop-in " + "replacement\n" + "for clang, letting you recompile third-party code with the " + "required " + "runtime\n" + "instrumentation. A common use pattern would be one of the " + "following:\n\n" + + " CC=%s/afl-clang-fast ./configure\n" + " CXX=%s/afl-clang-fast++ ./configure\n\n" + + "In contrast to the traditional afl-clang tool, this version is " + "implemented as\n" + "an LLVM pass and tends to offer improved performance with slow " + "programs.\n\n" + + "Environment variables used:\n" + "AFL_CC: path to the C compiler to use\n" + "AFL_CXX: path to the C++ compiler to use\n" + "AFL_PATH: path to instrumenting pass and runtime " + "(afl-llvm-rt.*o)\n" + "AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n" + "AFL_NO_BUILTIN: compile for use with libtokencap.so\n" + "AFL_INST_RATIO: percentage of branches to instrument\n" + "AFL_QUIET: suppress verbose output\n" + "AFL_DEBUG: enable developer debugging output\n" + "AFL_HARDEN: adds code hardening to catch memory bugs\n" + "AFL_USE_ASAN: activate address sanitizer\n" + "AFL_USE_MSAN: activate memory sanitizer\n" + "AFL_USE_UBSAN: activate undefined behaviour sanitizer\n" + "AFL_USE_CFISAN: activate control flow sanitizer\n" + "AFL_LLVM_WHITELIST: enable whitelisting (selective " + "instrumentation)\n" + "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n" + "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n" + "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n" + "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison " + "function calls\n" + " to cascaded comparisons\n" + "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to " + "cascaded " + "comp.\n" + "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n", + callname, BIN_PATH, BIN_PATH); + + SAYF( + "\nafl-clang-fast specific environment variables:\n" + "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n" + "AFL_LLVM_INSTRUMENT: set instrumentation mode: DEFAULT, CFG " + "(INSTRIM), LTO, CTX, NGRAM-2 ... NGRAM-16\n" + "You can also use the old environment variables:" + "AFL_LLVM_CTX: use context sensitive coverage\n" + "AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n" + "AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage\n" + "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n" + "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (sub " + "option to INSTRIM)\n"); + +#ifdef AFL_CLANG_FLTO + SAYF( + "\nafl-clang-lto specific environment variables:\n" + "AFL_LLVM_LTO_STARTID: from which ID to start counting from for a " + "bb\n" + "AFL_LLVM_LTO_DONTWRITEID: don't write the highest ID used to a " + "global var\n" + "AFL_REAL_LD: use this lld linker instead of the compiled in path\n" + "\nafl-clang-lto was built with linker target \"%s\" and LTO flags " + "\"%s\"\n" + "If anything fails - be sure to read README.lto.md!\n", + AFL_REAL_LD, AFL_CLANG_FLTO); +#endif + + SAYF( + "\nafl-clang-fast was built for llvm %s with the llvm binary path " + "of \"%s\".\n", + LLVM_VERSION, LLVM_BINDIR); SAYF("\n"); @@ -625,22 +770,17 @@ int main(int argc, char **argv, char **envp) { getenv("AFL_DEBUG") != NULL) { -#ifdef USE_TRACE_PC - SAYF(cCYA "afl-clang-fast" VERSION cRST - " [tpcg] by <lszekeres@google.com>\n"); -#warning \ - "You do not need to specifically compile with USE_TRACE_PC anymore, setting the environment variable AFL_LLVM_USE_TRACE_PC is enough." -#else - if (strstr(argv[0], "afl-clang-lto") == NULL) + if (instrument_mode != INSTRUMENT_LTO) - SAYF(cCYA "afl-clang-fast" VERSION cRST " by <lszekeres@google.com>\n"); + SAYF(cCYA "afl-clang-fast" VERSION cRST + " by <lszekeres@google.com> in %s mode\n", + ptr); else SAYF(cCYA "afl-clang-lto" VERSION cRST - " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); - -#endif /* ^USE_TRACE_PC */ + " by Marc \"vanHauser\" Heuse <mh@mh-sec.de> in mode %s\n", + ptr); } @@ -663,7 +803,7 @@ int main(int argc, char **argv, char **envp) { find_obj(argv[0]); #endif - edit_params(argc, argv); + edit_params(argc, argv, envp); if (debug) { diff --git a/llvm_mode/afl-ld.c b/llvm_mode/afl-ld.c deleted file mode 100644 index eb46c85c..00000000 --- a/llvm_mode/afl-ld.c +++ /dev/null @@ -1,839 +0,0 @@ -/* - american fuzzy lop++ - wrapper for GNU ld - ----------------------------------------- - - Written by Marc Heuse <mh@mh-sec.de> for afl++ - - Maintained by Marc Heuse <mh@mh-sec.de>, - Heiko Eißfeldt <heiko.eissfeldt@hexco.de> - Andrea Fioraldi <andreafioraldi@gmail.com> - Dominik Maier <domenukk@gmail.com> - - Copyright 2019-2020 AFLplusplus Project. All rights reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - The sole purpose of this wrapper is to preprocess clang LTO files before - linking by ld and perform the instrumentation on the whole program. - -*/ - -#define AFL_MAIN - -#include "config.h" -#include "types.h" -#include "debug.h" -#include "alloc-inl.h" - -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> -#include <ctype.h> -#include <fcntl.h> - -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/time.h> - -#include <dirent.h> - -#define MAX_PARAM_COUNT 4096 - -static u8 **ld_params, /* Parameters passed to the real 'ld' */ - **link_params, /* Parameters passed to 'llvm-link' */ - **opt_params, /* Parameters passed to 'opt' opt */ - **inst_params; /* Parameters passed to 'opt' inst */ - -static u8 *input_file; /* Originally specified input file */ -static u8 *final_file, /* Instrumented file for the real 'ld' */ - *linked_file, /* file where we link all files */ - *modified_file; /* file that was optimized before instr */ -static u8 *afl_path = AFL_PATH; -static u8 *real_ld = AFL_REAL_LD; -static u8 cwd[4096]; -static u8 *tmp_dir; -static u8 *ar_dir; -static u8 ar_dir_cnt; -static u8 *libdirs[254]; -static u8 libdir_cnt; - -static u8 be_quiet, /* Quiet mode (no stderr output) */ - debug, /* AFL_DEBUG */ - passthrough, /* AFL_LD_PASSTHROUGH - no link+optimize*/ - we_link, /* we have bc/ll -> link + optimize */ - just_version; /* Just show version? */ - -static u32 ld_param_cnt = 1, /* Number of params to 'ld' */ - link_param_cnt = 1, /* Number of params to 'llvm-link' */ - opt_param_cnt = 1, /* Number of params to 'opt' opt */ - inst_param_cnt = 1; /* Number of params to 'opt' instr */ - -/* This function wipes a directory - our AR unpack directory in this case */ -static u8 wipe_directory(u8 *path) { - - DIR * d; - struct dirent *d_ent; - - d = opendir(path); - - if (!d) return 0; - - while ((d_ent = readdir(d))) { - - if (strcmp(d_ent->d_name, ".") != 0 && strcmp(d_ent->d_name, "..") != 0) { - - u8 *fname = alloc_printf("%s/%s", path, d_ent->d_name); - if (unlink(fname)) PFATAL("Unable to delete '%s'", fname); - ck_free(fname); - - } - - } - - closedir(d); - - return !!rmdir(path); - -} - -/* remove temporary files on fatal errors */ -static void at_exit_handler(void) { - - if (!getenv("AFL_KEEP_ASSEMBLY")) { - - if (linked_file) { - - unlink(linked_file); - linked_file = NULL; - - } - - if (modified_file) { - - unlink(modified_file); - modified_file = NULL; - - } - - if (final_file) { - - unlink(final_file); - final_file = NULL; - - } - - if (ar_dir != NULL) { - - wipe_directory(ar_dir); - ar_dir = NULL; - - } - - } - -} - -/* This function checks if the parameter is a) an existing file and b) - if it is a BC or LL file, if both are true it returns 1 and 0 otherwise */ -int is_llvm_file(const char *file) { - - int fd; - u8 buf[5]; - - if ((fd = open(file, O_RDONLY)) < 0) { - - if (debug) SAYF(cMGN "[D] " cRST "File %s not found", file); - return 0; - - } - - if (read(fd, buf, 4) != 4) return 0; - buf[sizeof(buf) - 1] = 0; - - close(fd); - - if (strncmp(buf, "; Mo", 4) == 0) return 1; - - if (buf[0] == 'B' && buf[1] == 'C' && buf[2] == 0xc0 && buf[3] == 0xde) - return 1; - - return 0; - -} - -/* Return the current working directory, not thread safe ;-) */ -u8 *getthecwd() { - - static u8 fail[] = ""; - if (getcwd(cwd, sizeof(cwd)) == NULL) return fail; - return cwd; - -} - -/* Check if an ar extracted file is already in the parameter list */ -int is_duplicate(u8 **params, u32 ld_param_cnt, u8 *ar_file) { - - for (uint32_t i = 0; i < ld_param_cnt; i++) - if (params[i] != NULL) - if (strcmp(params[i], ar_file) == 0) return 1; - - return 0; - -} - -/* Examine and modify parameters to pass to 'ld', 'llvm-link' and 'llmv-ar'. - Note that the file name is always the last parameter passed by GCC, - so we exploit this property to keep the code "simple". */ -static void edit_params(int argc, char **argv) { - - u32 i, have_lto = 0, libdir_index; - u8 libdir_file[4096]; - - if (tmp_dir == NULL) { - - tmp_dir = getenv("TMPDIR"); - if (!tmp_dir) tmp_dir = getenv("TEMP"); - if (!tmp_dir) tmp_dir = getenv("TMP"); - if (!tmp_dir) tmp_dir = "/tmp"; - - } - - linked_file = - alloc_printf("%s/.afl-%u-%u-1.ll", tmp_dir, getpid(), (u32)time(NULL)); - modified_file = - alloc_printf("%s/.afl-%u-%u-2.bc", tmp_dir, getpid(), (u32)time(NULL)); - final_file = - alloc_printf("%s/.afl-%u-%u-3.bc", tmp_dir, getpid(), (u32)time(NULL)); - - ld_params = ck_alloc(4096 * sizeof(u8 *)); - link_params = ck_alloc(4096 * sizeof(u8 *)); - inst_params = ck_alloc(12 * sizeof(u8 *)); - opt_params = ck_alloc(12 * sizeof(u8 *)); - - ld_params[0] = (u8 *)real_ld; - ld_params[ld_param_cnt++] = "--allow-multiple-definition"; - - link_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-link"); - link_params[link_param_cnt++] = "-S"; // we create the linked file as .ll - link_params[link_param_cnt++] = "-o"; - link_params[link_param_cnt++] = linked_file; - - opt_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt"); - if (getenv("AFL_DONT_OPTIMIZE") == NULL) - opt_params[opt_param_cnt++] = "-O3"; - else - opt_params[opt_param_cnt++] = "-O0"; - - // opt_params[opt_param_cnt++] = "-S"; // only when debugging - opt_params[opt_param_cnt++] = linked_file; // input: .ll file - opt_params[opt_param_cnt++] = "-o"; - opt_params[opt_param_cnt++] = modified_file; // output: .bc file - - inst_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt"); - inst_params[inst_param_cnt++] = - alloc_printf("--load=%s/afl-llvm-lto-instrumentation.so", afl_path); - // inst_params[inst_param_cnt++] = "-S"; // only when debugging - inst_params[inst_param_cnt++] = "--disable-opt"; - inst_params[inst_param_cnt++] = "--afl-lto"; - inst_params[inst_param_cnt++] = modified_file; // input: .bc file - inst_params[inst_param_cnt++] = "-o"; - inst_params[inst_param_cnt++] = final_file; // output: .bc file - - // first we must collect all library search paths - for (i = 1; i < argc; i++) - if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == 'L') - libdirs[libdir_cnt++] = argv[i] + 2; - - // then we inspect all options to the target linker - for (i = 1; i < argc; i++) { - - if (ld_param_cnt >= MAX_PARAM_COUNT || link_param_cnt >= MAX_PARAM_COUNT) - FATAL( - "Too many command line parameters because of unpacking .a archives, " - "this would need to be done by hand ... sorry! :-("); - - if (strncmp(argv[i], "-flto", 5) == 0) have_lto = 1; - - if (!strcmp(argv[i], "-version")) { - - just_version = 1; - ld_params[1] = argv[i]; - ld_params[2] = NULL; - final_file = input_file; - return; - - } - - if (strcmp(argv[i], "--afl") == 0) { - - if (!be_quiet) OKF("afl++ test command line flag detected, exiting."); - exit(0); - - } - - // if a -l library is linked and no .so is found but an .a archive is there - // then the archive will be used. So we have to emulate this and check - // if an archive will be used and if yes we will instrument it too - libdir_file[0] = 0; - libdir_index = libdir_cnt; - if (strncmp(argv[i], "-l", 2) == 0 && libdir_cnt > 0 && - strncmp(argv[i], "-lgcc", 5) != 0) { - - u8 found = 0; - - for (uint32_t j = 0; j < libdir_cnt && !found; j++) { - - snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j], - argv[i] + 2, ".so"); - if (access(libdir_file, R_OK) != 0) { // no .so found? - - snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j], - argv[i] + 2, ".a"); - if (access(libdir_file, R_OK) == 0) { // but .a found? - - libdir_index = j; - found = 1; - if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file); - - } - - } else { - - found = 1; - if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file); - - } - - } - - } - - // is the parameter an .a AR archive? If so, unpack and check its files - if (libdir_index < libdir_cnt || - (argv[i][0] != '-' && strlen(argv[i]) > 2 && - argv[i][strlen(argv[i]) - 1] == 'a' && - argv[i][strlen(argv[i]) - 2] == '.')) { - - // This gets a bit odd. I encountered several .a files being linked and - // where the same "foo.o" was in both .a archives. llvm-link does not - // like this so we have to work around that ... - - u8 this_wd[4096], *this_ar; - u8 ar_params_cnt = 4; - u8 * ar_params[ar_params_cnt]; - u8 * file = argv[i]; - s32 pid, status; - DIR * arx; - struct dirent *dir_ent; - - if (libdir_index < libdir_cnt) file = libdir_file; - - if (ar_dir_cnt == 0) { // first archive, we setup up the basics - - ar_dir = alloc_printf("%s/.afl-%u-%u.dir", tmp_dir, getpid(), - (u32)time(NULL)); - if (mkdir(ar_dir, 0700) != 0) - FATAL("can not create temporary directory %s", ar_dir); - - } - - if (getcwd(this_wd, sizeof(this_wd)) == NULL) - FATAL("can not get the current working directory"); - if (chdir(ar_dir) != 0) - FATAL("can not chdir to temporary directory %s", ar_dir); - if (file[0] == '/') - this_ar = file; - else - this_ar = alloc_printf("%s/%s", this_wd, file); - ar_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-ar"); - ar_params[1] = "x"; - ar_params[2] = this_ar; - ar_params[3] = NULL; - - if (!be_quiet) OKF("Running ar unpacker on %s into %s", this_ar, ar_dir); - - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (uint32_t j = 0; j < ar_params_cnt; j++) - SAYF(" \"%s\"", ar_params[j]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(ar_params[0], (char **)ar_params); - FATAL("Oops, failed to execute '%s'", ar_params[0]); - - } - - if (pid < 0) FATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) FATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status)); - - if (chdir(this_wd) != 0) - FATAL("can not chdir back to our working directory %s", this_wd); - - if (!(arx = opendir(ar_dir))) FATAL("can not open directory %s", ar_dir); - - while ((dir_ent = readdir(arx)) != NULL) { - - u8 *ar_file = alloc_printf("%s/%s", ar_dir, dir_ent->d_name); - - if (dir_ent->d_name[strlen(dir_ent->d_name) - 1] == 'o' && - dir_ent->d_name[strlen(dir_ent->d_name) - 2] == '.') { - - if (passthrough || is_llvm_file(ar_file) == 0) { - - if (is_duplicate(ld_params, ld_param_cnt, ar_file) == 0) { - - ld_params[ld_param_cnt++] = ar_file; - if (debug) - SAYF(cMGN "[D] " cRST "not a LTO link file: %s\n", ar_file); - - } - - } else { - - if (is_duplicate(link_params, link_param_cnt, ar_file) == 0) { - - if (we_link == 0) { // we have to honor order ... - - ld_params[ld_param_cnt++] = final_file; - we_link = 1; - - } - - link_params[link_param_cnt++] = ar_file; - if (debug) SAYF(cMGN "[D] " cRST "is a link file: %s\n", ar_file); - - } - - } - - } else - - if (dir_ent->d_name[0] != '.' && !be_quiet) - WARNF("Unusual file found in ar archive %s: %s", argv[i], ar_file); - - } - - closedir(arx); - ar_dir_cnt++; - - continue; - - } - - if (passthrough || argv[i][0] == '-' || is_llvm_file(argv[i]) == 0) { - - // -O3 fucks up the CFG and instrumentation, so we downgrade to O2 - // which is as we want things. Lets hope this is not too different - // in the various llvm versions! - if (strncmp(argv[i], "-plugin-opt=O", 13) == 0 && - !getenv("AFL_DONT_OPTIMIZE")) - ld_params[ld_param_cnt++] = "-plugin-opt=O2"; - else - ld_params[ld_param_cnt++] = argv[i]; - - } else { - - if (we_link == 0) { // we have to honor order ... - ld_params[ld_param_cnt++] = final_file; - we_link = 1; - - } - - link_params[link_param_cnt++] = argv[i]; - - } - - } - - // if (have_lto == 0) ld_params[ld_param_cnt++] = AFL_CLANG_FLTO; // maybe we - // should not ... - ld_params[ld_param_cnt] = NULL; - link_params[link_param_cnt] = NULL; - opt_params[opt_param_cnt] = NULL; - inst_params[inst_param_cnt] = NULL; - -} - -/* clean AFL_PATH from PATH */ - -void clean_path() { - - char *tmp, *newpath = NULL, *path = getenv("PATH"); - u8 done = 0; - - if (debug) - SAYF(cMGN "[D]" cRST " old PATH=%s, AFL_PATH=%s\n", path, AFL_PATH); - - // wipe AFL paths from PATH that we set - // we added two paths so we remove the two paths - while (!done) { - - if (*path == 0) - done = 1; - else if (*path++ == ':') - done = 1; - - } - - while (*path == ':') - path++; - - // AFL_PATH could be additionally in PATH so check and remove to not call our - // 'ld' - const size_t pathlen = strlen(path); - const size_t afl_pathlen = strlen(AFL_PATH); - newpath = malloc(pathlen + 1); - if (strcmp(AFL_PATH, "/bin") != 0 && strcmp(AFL_PATH, "/usr/bin") != 0 && - afl_pathlen > 1 && (tmp = strstr(path, AFL_PATH)) != NULL && // it exists - (tmp == path || - (tmp > path && - tmp[-1] == ':')) && // either starts with it or has a colon before - (tmp + afl_pathlen == path + pathlen || - (tmp + afl_pathlen < - path + (pathlen && tmp[afl_pathlen] == - ':')) // end with it or has a colon at the end - )) { - - int one_colon = 1; - - if (tmp > path) { - - memcpy(newpath, path, tmp - path); - newpath[tmp - path - 1] = 0; // remove ':' - one_colon = 0; - - } - - if (tmp + afl_pathlen < path + pathlen) tmp += afl_pathlen + one_colon; - - setenv("PATH", newpath, 1); - - } else - - setenv("PATH", path, 1); - - if (debug) SAYF(cMGN "[D]" cRST " new PATH=%s\n", getenv("PATH")); - free(newpath); - -} - -/* Main entry point */ - -int main(int argc, char **argv) { - - s32 pid, i; - int status; - u8 *ptr, exe[4096], exe2[4096], proc[32], val[2] = " "; - int have_afl_ld_caller = 0; - - if (isatty(2) && !getenv("AFL_QUIET") && !getenv("AFL_DEBUG")) { - - if (getenv("AFL_LD") != NULL) - SAYF(cCYA "afl-ld" VERSION cRST - " by Marc \"vanHauser\" Heuse <mh@mh-sec.de> (level %d)\n", - have_afl_ld_caller); - - } else - - be_quiet = 1; - - if (getenv("AFL_DEBUG") != NULL) debug = 1; - if (getenv("AFL_PATH") != NULL) afl_path = getenv("AFL_PATH"); - if (getenv("AFL_LD_PASSTHROUGH") != NULL) passthrough = 1; - if (getenv("AFL_REAL_LD") != NULL) real_ld = getenv("AFL_REAL_LD"); - if (real_ld == NULL || strlen(real_ld) < 2) real_ld = "/bin/ld"; - if (real_ld != NULL && real_ld[0] != '/') - real_ld = alloc_printf("/bin/%s", real_ld); - - if ((ptr = getenv("AFL_LD_CALLER")) != NULL) have_afl_ld_caller = atoi(ptr); - val[0] = 0x31 + have_afl_ld_caller; - setenv("AFL_LD_CALLER", val, 1); - - if (debug) { - - SAYF(cMGN "[D] " cRST - "AFL_LD=%s, set AFL_LD_CALLER=%s, have_afl_ld_caller=%d, " - "real_ld=%s\n", - getenv("AFL_LD"), val, have_afl_ld_caller, real_ld); - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < argc; i++) - SAYF(" \"%s\"", argv[i]); - SAYF("\n"); - - } - - sprintf(proc, "/proc/%d/exe", getpid()); - if (readlink(proc, exe, sizeof(exe) - 1) > 0) { - - if (readlink(real_ld, exe2, sizeof(exe2) - 1) < 1) exe2[0] = 0; - exe[sizeof(exe) - 1] = 0; - exe[sizeof(exe2) - 1] = 0; - if (strcmp(exe, real_ld) == 0 || strcmp(exe, exe2) == 0) - PFATAL(cLRD "[!] " cRST - "Error: real 'ld' path points to afl-ld, set AFL_REAL_LD to " - "the real 'ld' program!"); - - } - - if (have_afl_ld_caller > 1) - PFATAL(cLRD "[!] " cRST - "Error: afl-ld calls itself in a loop, set AFL_REAL_LD to the " - "real 'ld' program!"); - - if (argc < 2) { - - SAYF( - "\n" - "This is a helper application for afl-fuzz. It is a wrapper around GNU " - "'ld',\n" - "executed by the toolchain whenever using " - "afl-clang-lto/afl-clang-lto++.\n" - "You probably don't want to run this program directly.\n\n" - - "Environment variables:\n" - " AFL_LD_PASSTHROUGH do not link+optimize == no instrumentation\n" - " AFL_REAL_LD point to the real ld if necessary\n" - - "\nafl-ld was compiled with the fixed real 'ld' path of %s and the " - "clang " - "bin path of %s\n\n", - real_ld, LLVM_BINDIR); - - exit(1); - - } - - if (getenv("AFL_LD") == NULL) { - - /* if someone install clang/ld into the same directory as afl++ then - they are out of luck ... */ - - if (have_afl_ld_caller == 1) { clean_path(); } - - if (real_ld != NULL && strlen(real_ld) > 1) execvp(real_ld, argv); - execvp("ld", argv); // fallback - PFATAL("Oops, failed to execute 'ld' - check your PATH"); - - } - - atexit(at_exit_handler); // ensure to wipe temp files if things fail - - edit_params(argc, argv); // here most of the magic happens :-) - - if (debug) - SAYF(cMGN "[D] " cRST - "param counts: ar:%u lib:%u ld:%u link:%u opt:%u instr:%u\n", - ar_dir_cnt, libdir_cnt, ld_param_cnt, link_param_cnt, opt_param_cnt, - inst_param_cnt); - - if (!just_version) { - - if (we_link == 0) { - - if (!getenv("AFL_QUIET")) - WARNF("No LTO input file found, cannot instrument!"); - - } else { - - /* first we link all files */ - if (!be_quiet) OKF("Running bitcode linker, creating %s", linked_file); - - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < link_param_cnt; i++) - SAYF(" \"%s\"", link_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(link_params[0], (char **)link_params); - FATAL("Oops, failed to execute '%s'", link_params[0]); - - } - - if (pid < 0) PFATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) { - - SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD - "\n[-] PROGRAM ABORT : " cRST); - SAYF( - "llvm-link failed! Probable causes:\n\n" - " #1 If the error is \"linking globals named '...': symbol " - "multiply defined\"\n" - " then there is nothing we can do - llvm-link is missing an " - "important feature\n\n" - " #2 If the error is \"expected top-level entity\" and then " - "binary output, this\n" - " is because the same file is present in different .a archives " - "in different\n" - " formats. This can be fixed by manual doing the steps afl-ld " - "is doing but\n" - " programmatically - sorry!\n\n"); - exit(WEXITSTATUS(status)); - - } - - /* then we perform an optimization on the collected objects files */ - if (!be_quiet) - OKF("Performing optimization via opt, creating %s", modified_file); - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < opt_param_cnt; i++) - SAYF(" \"%s\"", opt_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(opt_params[0], (char **)opt_params); - FATAL("Oops, failed to execute '%s'", opt_params[0]); - - } - - if (pid < 0) PFATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status)); - - /* then we run the instrumentation through the optimizer */ - if (!be_quiet) - OKF("Performing instrumentation via opt, creating %s", final_file); - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < inst_param_cnt; i++) - SAYF(" \"%s\"", inst_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - execvp(inst_params[0], (char **)inst_params); - FATAL("Oops, failed to execute '%s'", inst_params[0]); - - } - - if (pid < 0) PFATAL("fork() failed"); - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status)); - - } - - /* next step - run the linker! :-) */ - - } - - if (!be_quiet) OKF("Running real linker %s", real_ld); - if (debug) { - - SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd()); - for (i = 0; i < ld_param_cnt; i++) - SAYF(" \"%s\"", ld_params[i]); - SAYF("\n"); - - } - - if (!(pid = fork())) { - - clean_path(); - - unsetenv("AFL_LD"); - - if (strlen(real_ld) > 1) execvp(real_ld, (char **)ld_params); - execvp("ld", (char **)ld_params); // fallback - FATAL("Oops, failed to execute 'ld' - check your PATH"); - - } - - if (pid < 0) PFATAL("fork() failed"); - - if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); - if (debug) SAYF(cMGN "[D] " cRST "linker result: %d\n", status); - - if (!just_version) { - - if (!getenv("AFL_KEEP_ASSEMBLY")) { - - if (linked_file) { - - unlink(linked_file); - linked_file = NULL; - - } - - if (modified_file) { - - unlink(modified_file); - modified_file = NULL; - - } - - if (final_file) { - - unlink(final_file); - final_file = NULL; - - } - - if (ar_dir != NULL) { - - wipe_directory(ar_dir); - ar_dir = NULL; - - } - - } else { - - if (!be_quiet) { - - SAYF( - "[!] afl-ld: keeping link file %s, optimized bitcode %s and " - "instrumented bitcode %s", - linked_file, modified_file, final_file); - if (ar_dir_cnt > 0 && ar_dir) - SAYF(" and ar archive unpack directory %s", ar_dir); - SAYF("\n"); - - } - - } - - if (status == 0) { - - if (!be_quiet) OKF("Linker was successful"); - - } else { - - SAYF(cLRD "[-] " cRST - "Linker failed, please investigate and send a bug report. Most " - "likely an 'ld' option is incompatible with %s. Try " - "AFL_KEEP_ASSEMBLY=1 and AFL_DEBUG=1 for replaying.\n", - AFL_CLANG_FLTO); - - } - - } - - exit(WEXITSTATUS(status)); - -} - diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc index febb8950..c5e7a2b7 100644 --- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc +++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc @@ -23,12 +23,6 @@ */ -// CONFIG OPTION: -// If #define USE_SPLIT is used, then the llvm::SplitEdge function is used -// instead of our own implementation. Ours looks better and will -// compile everywhere. But it is not working for complex code. yet. damn. -#define USE_SPLIT - #define AFL_LLVM_PASS #include "config.h" @@ -44,31 +38,24 @@ #include <sys/time.h> #include "llvm/Config/llvm-config.h" -#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 -typedef long double max_align_t; -#endif - +#include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#ifdef USE_SPLIT #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSAUpdater.h" -#endif +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/CFG.h" -#else -#include "llvm/DebugInfo.h" -#include "llvm/Support/CFG.h" -#endif +#include <set> using namespace llvm; @@ -91,7 +78,6 @@ class AFLLTOPass : public ModulePass { } -#ifdef USE_SPLIT void getAnalysisUsage(AnalysisUsage &AU) const override { ModulePass::getAnalysisUsage(AU); @@ -100,8 +86,6 @@ class AFLLTOPass : public ModulePass { } -#endif - // Calculate the number of average collisions that would occur if all // location IDs would be assigned randomly (like normal afl/afl++). // This uses the "balls in bins" algorithm. @@ -168,7 +152,7 @@ class AFLLTOPass : public ModulePass { bool runOnModule(Module &M) override; protected: - int afl_global_id = 1, debug = 0; + int afl_global_id = 1, debug = 0, autodictionary = 0; uint32_t be_quiet = 0, inst_blocks = 0, inst_funcs = 0, total_instr = 0; }; @@ -177,22 +161,19 @@ class AFLLTOPass : public ModulePass { bool AFLLTOPass::runOnModule(Module &M) { - LLVMContext &C = M.getContext(); + LLVMContext & C = M.getContext(); + std::vector<std::string> dictionary; + std::vector<CallInst *> calls; + DenseMap<Value *, std::string *> valueMap; - IntegerType * Int8Ty = IntegerType::getInt8Ty(C); - IntegerType * Int32Ty = IntegerType::getInt32Ty(C); - struct timeval tv; - struct timezone tz; - u32 rand_seed; + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); - /* Setup random() so we get Actually Random(TM) outputs from AFL_R() */ - gettimeofday(&tv, &tz); - rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); - AFL_SR(rand_seed); + if (getenv("AFL_DEBUG")) debug = 1; /* Show a banner */ - if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { SAYF(cCYA "afl-llvm-lto" VERSION cRST " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); @@ -201,9 +182,9 @@ bool AFLLTOPass::runOnModule(Module &M) { be_quiet = 1; -#if LLVM_VERSION_MAJOR < 9 - char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO"); -#endif + if (getenv("AFL_LLVM_AUTODICTIONARY") || + getenv("AFL_LLVM_LTO_AUTODICTIONARY")) + autodictionary = 1; /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ @@ -224,14 +205,320 @@ bool AFLLTOPass::runOnModule(Module &M) { if (F.size() < 2) continue; if (isBlacklisted(&F)) continue; -#ifdef USE_SPLIT - // DominatorTree &DT = - // getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); LoopInfo & LI = - // getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo(); -#endif - std::vector<BasicBlock *> InsBlocks; + if (autodictionary) { + + /* Some implementation notes. + * + * We try to handle 3 cases: + * - memcmp("foo", arg, 3) <- literal string + * - static char globalvar[] = "foo"; + * memcmp(globalvar, arg, 3) <- global variable + * - char localvar[] = "foo"; + * memcmp(locallvar, arg, 3) <- local variable + * + * The local variable case is the hardest. We can only detect that + * case if there is no reassignment or change in the variable. + * And it might not work across llvm version. + * What we do is hooking the initializer function for local variables + * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned + * variable. And if that variable is then used in a compare function + * we use that noted string. + * This seems not to work for tokens that have a size <= 4 :-( + * + * - if the compared length is smaller than the string length we + * save the full string. This is likely better for fuzzing but + * might be wrong in a few cases depending on optimizers + * + * - not using StringRef because there is a bug in the llvm 11 + * checkout I am using which sometimes points to wrong strings + * + * Over and out. Took me a full day. damn. mh/vh + */ + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool addedNull = false; + uint8_t optLen = 0; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= !FuncName.compare("memcmp"); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1 = getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) + HasStr1 = false; + else + Str1 = TmpStr.str(); + bool HasStr2 = getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) + HasStr2 = false; + else + Str2 = TmpStr.str(); + + if (debug) + fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", + FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), + Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, + Str2P->getName().str().c_str(), Str2.c_str(), + HasStr2 == true ? "true" : "false"); + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString().str(); + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = Str2.size(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + addedNull = true; + + } + + } + + valueMap[Str1P] = new std::string(Str2); + + if (debug) + fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + if (debug) + fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), + Str2P); + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast<ConstantExpr>(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString().str(); + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + if (debug) + fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), + Str1P); + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 ^ HasStr2)) continue; + + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } + + } + + // add null byte if this is a string compare function and a null + // was not already added + if (addedNull == false && !isMemcmp) { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + if (!be_quiet) { + + std::string outstring; + fprintf(stderr, "%s: length %u/%u \"", FuncName.c_str(), optLen, + (unsigned int)thestring.length()); + for (uint8_t i = 0; i < thestring.length(); i++) { + + uint8_t c = thestring[i]; + if (c <= 32 || c >= 127) + fprintf(stderr, "\\x%02x", c); + else + fprintf(stderr, "%c", c); + + } + + fprintf(stderr, "\"\n"); + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < MIN_AUTO_EXTRA) // too short? skip + continue; + + dictionary.push_back(thestring.substr(0, optLen)); + + } + + } + + } + + } + for (auto &BB : F) { uint32_t succ = 0; @@ -274,11 +561,7 @@ bool AFLLTOPass::runOnModule(Module &M) { for (uint32_t j = 0; j < Successors.size(); j++) { -#ifdef USE_SPLIT BasicBlock *newBB = llvm::SplitEdge(origBB, Successors[j]); -#else - BasicBlock *newBB = BasicBlock::Create(C, "", &F, nullptr); -#endif if (!newBB) { @@ -287,12 +570,8 @@ bool AFLLTOPass::runOnModule(Module &M) { } -#ifdef USE_SPLIT BasicBlock::iterator IP = newBB->getFirstInsertionPt(); IRBuilder<> IRB(&(*IP)); -#else - IRBuilder<> IRB(&(*newBB)); -#endif /* Set the ID of the inserted basic block */ @@ -313,38 +592,12 @@ bool AFLLTOPass::runOnModule(Module &M) { Value *Incr = IRB.CreateAdd(Counter, One); -#if LLVM_VERSION_MAJOR < 9 - if (neverZero_counters_str != - NULL) { // with llvm 9 we make this the default as the bug in - // llvm is then fixed -#endif - auto cf = IRB.CreateICmpEQ(Incr, Zero); - auto carry = IRB.CreateZExt(cf, Int8Ty); - Incr = IRB.CreateAdd(Incr, carry); -#if LLVM_VERSION_MAJOR < 9 - - } - -#endif + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); -#ifdef USE_SPLIT - // nothing -#else - - // Unconditional jump to the destination BB - - IRB.CreateBr(Successors[j]); - - // Replace the original destination to this newly inserted BB - - origBB->replacePhiUsesWith(Successors[j], newBB); - BasicBlock *S = Successors[j]; - S->replacePhiUsesWith(origBB, newBB); - TI->setSuccessor(j, newBB); - -#endif // done :) inst_blocks++; @@ -355,31 +608,148 @@ bool AFLLTOPass::runOnModule(Module &M) { } + // save highest location ID to global variable + // do this after each function to fail faster + if (afl_global_id > MAP_SIZE) { + + uint32_t pow2map = 1, map = afl_global_id; + while ((map = map >> 1)) + pow2map++; + FATAL( + "We have %u blocks to instrument but the map size is only %u! Edit " + "config.h and set MAP_SIZE_POW2 from %u to %u, then recompile " + "afl-fuzz and llvm_mode.", + afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map); + + } + } - // save highest location ID to global variable + if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL || dictionary.size()) { - if (afl_global_id > MAP_SIZE) { + // yes we could create our own function, insert it into ctors ... + // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o - uint32_t pow2map = 1, map = afl_global_id; - while ((map = map >> 1)) - pow2map++; - FATAL( - "We have %u blocks to instrument but the map size is only %u! Edit " - "config.h and set MAP_SIZE_POW2 from %u to %u, then recompile " - "afl-fuzz and llvm_mode.", - afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map); + Function *f = M.getFunction("__afl_auto_init_globals"); - } + if (!f) { + + fprintf(stderr, + "Error: init function could not be found (this hould not " + "happen)\n"); + exit(-1); + + } + + BasicBlock *bb = &f->getEntryBlock(); + if (!bb) { + + fprintf(stderr, + "Error: init function does not have an EntryBlock (this should " + "not happen)\n"); + exit(-1); + + } + + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); - if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { + if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { - GlobalVariable *AFLFinalLoc = new GlobalVariable( - M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc", 0, - GlobalVariable::GeneralDynamicTLSModel, 0, false); - ConstantInt *const_loc = ConstantInt::get(Int32Ty, afl_global_id); - AFLFinalLoc->setAlignment(4); - AFLFinalLoc->setInitializer(const_loc); + uint32_t write_loc = afl_global_id; + + if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3); + + if (write_loc <= MAP_SIZE && write_loc <= 0x800000) { + + GlobalVariable *AFLFinalLoc = new GlobalVariable( + M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, + "__afl_final_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, + false); + ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc); + StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); + StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + if (dictionary.size()) { + + size_t memlen = 0, count = 0, offset = 0; + char * ptr; + + for (auto token : dictionary) { + + memlen += token.length(); + count++; + + } + + if (!be_quiet) + printf("AUTODICTIONARY: %lu string%s found\n", count, + count == 1 ? "" : "s"); + + if (count) { + + if ((ptr = (char *)malloc(memlen + count)) == NULL) { + + fprintf(stderr, "Error: malloc for %lu bytes failed!\n", + memlen + count); + exit(-1); + + } + + count = 0; + + for (auto token : dictionary) { + + if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { + + ptr[offset++] = (uint8_t)token.length(); + memcpy(ptr + offset, token.c_str(), token.length()); + offset += token.length(); + count++; + + } + + } + + GlobalVariable *AFLDictionaryLen = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, + "__afl_dictionary_len", 0, GlobalVariable::GeneralDynamicTLSModel, + 0, false); + ConstantInt *const_len = ConstantInt::get(Int32Ty, offset); + StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen); + StoreDictLen->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset); + GlobalVariable *AFLInternalDictionary = new GlobalVariable( + M, ArrayTy, true, GlobalValue::ExternalLinkage, + ConstantDataArray::get(C, + *(new ArrayRef<char>((char *)ptr, offset))), + "__afl_internal_dictionary", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); + AFLInternalDictionary->setInitializer(ConstantDataArray::get( + C, *(new ArrayRef<char>((char *)ptr, offset)))); + AFLInternalDictionary->setConstant(true); + + GlobalVariable *AFLDictionary = new GlobalVariable( + M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, + 0, "__afl_dictionary"); + + Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero); + Value *AFLDictPtr = + IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0)); + StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary); + StoreDict->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } } @@ -392,10 +762,11 @@ bool AFLLTOPass::runOnModule(Module &M) { else { char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s", + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", getenv("AFL_HARDEN") ? "hardened" : "non-hardened", getenv("AFL_USE_ASAN") ? ", ASAN" : "", getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); OKF("Instrumented %u locations with no collisions (on average %llu " "collisions would be in afl-gcc/afl-clang-fast) (%s mode).", @@ -422,5 +793,5 @@ static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass", false, false); static RegisterStandardPasses RegisterAFLLTOPass( - PassManagerBuilder::EP_OptimizerLast, registerAFLLTOPass); + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass); diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 133c64b4..b4249802 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -2,12 +2,15 @@ american fuzzy lop++ - LLVM-mode instrumentation pass --------------------------------------------------- - Written by Laszlo Szekeres <lszekeres@google.com> and + Written by Laszlo Szekeres <lszekeres@google.com>, + Adrian Herrera <adrian.herrera@anu.edu.au>, Michal Zalewski LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted from afl-as.c are Michal's fault. + NGRAM previous location coverage comes from Adrian Herrera. + Copyright 2015, 2016 Google Inc. All rights reserved. Copyright 2019-2020 AFLplusplus Project. All rights reserved. @@ -27,7 +30,6 @@ #include "config.h" #include "debug.h" - #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -47,6 +49,7 @@ typedef long double max_align_t; #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #if LLVM_VERSION_MAJOR > 3 || \ @@ -58,6 +61,8 @@ typedef long double max_align_t; #include "llvm/Support/CFG.h" #endif +#include "llvm-ngram-coverage.h" + using namespace llvm; namespace { @@ -118,6 +123,9 @@ class AFLCoverage : public ModulePass { protected: std::list<std::string> myWhitelist; + uint32_t ngram_size = 0; + uint32_t debug = 0; + char * ctx_str = NULL; }; @@ -125,12 +133,40 @@ class AFLCoverage : public ModulePass { char AFLCoverage::ID = 0; +/* needed up to 3.9.0 */ +#if LLVM_VERSION_MAJOR == 3 && \ + (LLVM_VERSION_MINOR < 9 || \ + (LLVM_VERSION_MINOR == 9 && LLVM_VERSION_PATCH < 1)) +uint64_t PowerOf2Ceil(unsigned in) { + + uint64_t in64 = in - 1; + in64 |= (in64 >> 1); + in64 |= (in64 >> 2); + in64 |= (in64 >> 4); + in64 |= (in64 >> 8); + in64 |= (in64 >> 16); + in64 |= (in64 >> 32); + return in64 + 1; + +} + +#endif + +/* #if LLVM_VERSION_STRING >= "4.0.1" */ +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1) +#define AFL_HAVE_VECTOR_INTRINSICS 1 +#endif bool AFLCoverage::runOnModule(Module &M) { LLVMContext &C = M.getContext(); - IntegerType * Int8Ty = IntegerType::getInt8Ty(C); - IntegerType * Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); +#ifdef AFL_HAVE_VECTOR_INTRINSICS + IntegerType *IntLocTy = + IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT); +#endif struct timeval tv; struct timezone tz; u32 rand_seed; @@ -145,9 +181,12 @@ bool AFLCoverage::runOnModule(Module &M) { char be_quiet = 0; + if (getenv("AFL_DEBUG")) debug = 1; + if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { - SAYF(cCYA "afl-llvm-pass" VERSION cRST " by <lszekeres@google.com>\n"); + SAYF(cCYA "afl-llvm-pass" VERSION cRST + " by <lszekeres@google.com> and <adrian.herrera@anu.edu.au>\n"); } else @@ -170,32 +209,171 @@ bool AFLCoverage::runOnModule(Module &M) { char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO"); #endif + unsigned PrevLocSize; + + char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE"); + if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE"); + ctx_str = getenv("AFL_LLVM_CTX"); + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* Decide previous location vector size (must be a power of two) */ + VectorType *PrevLocTy; + + if (ngram_size_str) + if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 || + ngram_size > NGRAM_SIZE_MAX) + FATAL( + "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX " + "(%u))", + NGRAM_SIZE_MAX); + + if (ngram_size == 1) ngram_size = 0; + if (ngram_size) + PrevLocSize = ngram_size - 1; + else +#else + if (ngram_size_str) + FATAL("Sorry, NGRAM branch coverage is not supported with llvm version %s!", + LLVM_VERSION_STRING); +#endif + PrevLocSize = 1; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + uint64_t PrevLocVecSize = PowerOf2Ceil(PrevLocSize); + if (ngram_size) PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize); +#endif + + if (ctx_str && ngram_size_str) + FATAL("you must decide between NGRAM and CTX instrumentation"); + /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ GlobalVariable *AFLMapPtr = new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + GlobalVariable *AFLPrevLoc; + GlobalVariable *AFLContext; + if (ctx_str) #ifdef __ANDROID__ - GlobalVariable *AFLPrevLoc = new GlobalVariable( - M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc"); + AFLContext = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx"); #else - GlobalVariable *AFLPrevLoc = new GlobalVariable( + AFLContext = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) +#ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc"); +#else + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc", + /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel, + /* AddressSpace */ 0, /* IsExternallyInitialized */ false); +#endif + else +#endif +#ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc"); +#else + AFLPrevLoc = new GlobalVariable( M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); #endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* Create the vector shuffle mask for updating the previous block history. + Note that the first element of the vector will store cur_loc, so just set + it to undef to allow the optimizer to do its thing. */ + + SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)}; + + for (unsigned I = 0; I < PrevLocSize - 1; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I)); + + for (unsigned I = PrevLocSize; I < PrevLocVecSize; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize)); + + Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle); +#endif + + // other constants we need ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); ConstantInt *One = ConstantInt::get(Int8Ty, 1); + LoadInst *PrevCtx; // CTX sensitive coverage + /* Instrument all the things! */ int inst_blocks = 0; for (auto &F : M) { + int has_calls = 0; + if (debug) + fprintf(stderr, "FUNCTION: %s (%zu)\n", F.getName().str().c_str(), + F.size()); + if (isBlacklisted(&F)) continue; + // AllocaInst *CallingContext = nullptr; + + if (ctx_str && F.size() > 1) { // Context sensitive coverage + // load the context ID of the previous function and write to to a local + // variable on the stack + auto bb = &F.getEntryBlock(); + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + PrevCtx = IRB.CreateLoad(AFLContext); + PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + // does the function have calls? and is any of the calls larger than one + // basic block? + has_calls = 0; + for (auto &BB : F) { + + if (has_calls) break; + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + if ((callInst = dyn_cast<CallInst>(&IN))) { + + Function *Callee = callInst->getCalledFunction(); + if (!Callee || Callee->size() < 2) + continue; + else { + + has_calls = 1; + break; + + } + + } + + } + + } + + // if yes we store a context ID for this function in the global var + if (has_calls) { + + ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(MAP_SIZE)); + StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext); + StoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + for (auto &BB : F) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); @@ -310,6 +488,22 @@ bool AFLCoverage::runOnModule(Module &M) { } + // in CTX mode we have to restore the original context for the caller - + // she might be calling other functions which need the correct CTX + if (ctx_str && has_calls) { + + Instruction *Inst = BB.getTerminator(); + if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { + + IRBuilder<> Post_IRB(Inst); + StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + if (AFL_R(100) >= inst_ratio) continue; /* Make up cur_loc */ @@ -356,20 +550,50 @@ bool AFLCoverage::runOnModule(Module &M) { // fprintf(stderr, " == %d\n", more_than_one); if (more_than_one != 1) continue; #endif - ConstantInt *CurLoc = ConstantInt::get(Int32Ty, cur_loc); + + ConstantInt *CurLoc; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + CurLoc = ConstantInt::get(IntLocTy, cur_loc); + else +#endif + CurLoc = ConstantInt::get(Int32Ty, cur_loc); /* Load prev_loc */ LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + Value *PrevLocTrans; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* "For efficiency, we propose to hash the tuple as a key into the + hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where + prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */ + + if (ngram_size) + PrevLocTrans = IRB.CreateXorReduce(PrevLoc); + else +#endif + if (ctx_str) + PrevLocTrans = IRB.CreateZExt(IRB.CreateXor(PrevLoc, PrevCtx), Int32Ty); + else + PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); /* Load SHM pointer */ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - Value *MapPtrIdx = - IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc)); + + Value *MapPtrIdx; +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + MapPtrIdx = IRB.CreateGEP( + MapPtr, + IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, CurLoc), Int32Ty)); + else +#endif + MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc)); /* Update bitmap */ @@ -449,11 +673,31 @@ bool AFLCoverage::runOnModule(Module &M) { IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - /* Set prev_loc to cur_loc >> 1 */ + /* Update prev_loc history vector (by placing cur_loc at the head of the + vector and shuffle the other elements back by one) */ + + StoreInst *Store; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) { + + Value *ShuffledPrevLoc = IRB.CreateShuffleVector( + PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask); + Value *UpdatedPrevLoc = IRB.CreateInsertElement( + ShuffledPrevLoc, IRB.CreateLShr(CurLoc, (uint64_t)1), (uint64_t)0); + + Store = IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - StoreInst *Store = - IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); - Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + } else + +#endif + { + + Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), + AFLPrevLoc); + + } inst_blocks++; @@ -470,10 +714,11 @@ bool AFLCoverage::runOnModule(Module &M) { else { char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s", + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", getenv("AFL_HARDEN") ? "hardened" : "non-hardened", getenv("AFL_USE_ASAN") ? ", ASAN" : "", getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); OKF("Instrumented %u locations (%s mode, ratio %u%%).", inst_blocks, modeline, inst_ratio); diff --git a/llvm_mode/afl-llvm-rt-lto.o.c b/llvm_mode/afl-llvm-rt-lto.o.c new file mode 100644 index 00000000..5921f968 --- /dev/null +++ b/llvm_mode/afl-llvm-rt-lto.o.c @@ -0,0 +1,23 @@ +/* + american fuzzy lop++ - LLVM instrumentation bootstrap + ----------------------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +// to prevent the function from being removed +unsigned char __afl_lto_mode = 0; + +/* Proper initialization routine. */ + +__attribute__((constructor(0))) void __afl_auto_init_globals(void) { + + __afl_lto_mode = 1; + +} + diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c index 5f9a5534..3ad9eab4 100644 --- a/llvm_mode/afl-llvm-rt.o.c +++ b/llvm_mode/afl-llvm-rt.o.c @@ -26,6 +26,7 @@ #include "config.h" #include "types.h" #include "cmplog.h" +#include "llvm-ngram-coverage.h" #include <stdio.h> #include <stdlib.h> @@ -41,15 +42,15 @@ #include <sys/wait.h> #include <sys/types.h> +#ifdef __linux__ +#include "snapshot-inl.h" +#endif + /* This is a somewhat ugly hack for the experimental 'trace-pc-guard' mode. Basically, we need to make sure that the forkserver is initialized after the LLVM-generated runtime initialization pass, not before. */ -#ifdef USE_TRACE_PC #define CONST_PRIO 5 -#else -#define CONST_PRIO 0 -#endif /* ^USE_TRACE_PC */ #include <sys/mman.h> #include <fcntl.h> @@ -60,17 +61,23 @@ u8 __afl_area_initial[MAP_SIZE]; u8 *__afl_area_ptr = __afl_area_initial; +u8 *__afl_dictionary; #ifdef __ANDROID__ -u32 __afl_prev_loc; -u32 __afl_final_loc; +PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; +u32 __afl_final_loc; +u32 __afl_prev_ctx; +u32 __afl_cmp_counter; +u32 __afl_dictionary_len; #else -__thread u32 __afl_prev_loc; -__thread u32 __afl_final_loc; +__thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; +__thread u32 __afl_final_loc; +__thread u32 __afl_prev_ctx; +__thread u32 __afl_cmp_counter; +__thread u32 __afl_dictionary_len; #endif struct cmp_map *__afl_cmp_map; -__thread u32 __afl_cmp_counter; /* Running in persistent mode? */ @@ -92,6 +99,10 @@ static void __afl_map_shm(void) { const char * shm_file_path = id_str; int shm_fd = -1; unsigned char *shm_base = NULL; + unsigned int map_size = MAP_SIZE + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) map_size = + __afl_final_loc; /* create the shared memory segment as if it was a file */ shm_fd = shm_open(shm_file_path, O_RDWR, 0600); @@ -103,7 +114,7 @@ static void __afl_map_shm(void) { } /* map the shared memory segment to the address space of the process */ - shm_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); + shm_base = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); if (shm_base == MAP_FAILED) { close(shm_fd); @@ -176,30 +187,264 @@ static void __afl_map_shm(void) { } +#ifdef __linux__ +static void __afl_start_snapshots(void) { + + static u8 tmp[4] = {0, 0, 0, 0}; + s32 child_pid; + u32 status = 0; + u32 map_size = MAP_SIZE; + u32 already_read_first = 0; + u32 was_killed; + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) + map_size = __afl_final_loc; + + u8 child_stopped = 0; + + void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + + /* Phone home and tell the parent that we're OK. If parent isn't there, + assume we're not running in forkserver mode and just execute program. */ + + status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT); + if (map_size <= 0x800000) + status |= (FS_OPT_SET_MAPSIZE(map_size) | FS_OPT_MAPSIZE); + if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT; + memcpy(tmp, &status, 4); + + if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; + + if (__afl_dictionary_len > 0 && __afl_dictionary) { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == + (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { + + // great lets pass the dictionary through the forkserver FD + u32 len = __afl_dictionary_len, offset = 0; + s32 ret; + + if (write(FORKSRV_FD + 1, &len, 4) != 4) { + + write(2, "Error: could not send dictionary len\n", + strlen("Error: could not send dictionary len\n")); + _exit(1); + + } + + while (len != 0) { + + ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len); + + if (ret < 1) { + + write(2, "Error: could not send dictionary\n", + strlen("Error: could not send dictionary\n")); + _exit(1); + + } + + len -= ret; + offset += ret; + + } + + } else { + + // uh this forkserver master does not understand extended option passing + // or does not want the dictionary + already_read_first = 1; + + } + + } + + while (1) { + + int status; + + if (already_read_first) { + + already_read_first = 0; + + } else { + + /* Wait for parent by reading from the pipe. Abort if read fails. */ + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + } + + /* If we stopped the child in persistent mode, but there was a race + condition and afl-fuzz already issued SIGKILL, write off the old + process. */ + + if (child_stopped && was_killed) { + + child_stopped = 0; + if (waitpid(child_pid, &status, 0) < 0) _exit(1); + + } + + if (!child_stopped) { + + /* Once woken up, create a clone of our process. */ + + child_pid = fork(); + if (child_pid < 0) _exit(1); + + /* In child process: close fds, resume execution. */ + + if (!child_pid) { + + signal(SIGCHLD, old_sigchld_handler); + + close(FORKSRV_FD); + close(FORKSRV_FD + 1); + + if (!afl_snapshot_do()) { raise(SIGSTOP); } + + __afl_area_ptr[0] = 1; + memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); + + return; + + } + + } else { + + /* Special handling for persistent mode: if the child is alive but + currently stopped, simply restart it with SIGCONT. */ + + kill(child_pid, SIGCONT); + child_stopped = 0; + + } + + /* In parent process: write PID to pipe, then wait for child. */ + + if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1); + + if (waitpid(child_pid, &status, WUNTRACED) < 0) _exit(1); + + /* In persistent mode, the child stops itself with SIGSTOP to indicate + a successful run. In this case, we want to wake it up without forking + again. */ + + if (WIFSTOPPED(status)) child_stopped = 1; + + /* Relay wait status to pipe, then loop back. */ + + if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1); + + } + +} + +#endif + /* Fork server logic. */ static void __afl_start_forkserver(void) { - static u8 tmp[4]; - s32 child_pid; +#ifdef __linux__ + if (!is_persistent && !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") && + afl_snapshot_init() >= 0) { + + __afl_start_snapshots(); + return; + + } + +#endif + + u8 tmp[4] = {0, 0, 0, 0}; + s32 child_pid; + u32 status = 0; + u32 map_size = MAP_SIZE; + u32 already_read_first = 0; + u32 was_killed; + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) + map_size = __afl_final_loc; u8 child_stopped = 0; void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + if (map_size <= 0x800000) + status |= (FS_OPT_SET_MAPSIZE(map_size) | FS_OPT_MAPSIZE); + if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT; + if (status) status |= (FS_OPT_ENABLED); + memcpy(tmp, &status, 4); + /* Phone home and tell the parent that we're OK. If parent isn't there, assume we're not running in forkserver mode and just execute program. */ if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; + if (__afl_dictionary_len > 0 && __afl_dictionary) { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == + (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { + + // great lets pass the dictionary through the forkserver FD + u32 len = __afl_dictionary_len, offset = 0; + s32 ret; + + if (write(FORKSRV_FD + 1, &len, 4) != 4) { + + write(2, "Error: could not send dictionary len\n", + strlen("Error: could not send dictionary len\n")); + _exit(1); + + } + + while (len != 0) { + + ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len); + + if (ret < 1) { + + write(2, "Error: could not send dictionary\n", + strlen("Error: could not send dictionary\n")); + _exit(1); + + } + + len -= ret; + offset += ret; + + } + + } else { + + // uh this forkserver master does not understand extended option passing + // or does not want the dictionary + already_read_first = 1; + + } + + } + while (1) { - u32 was_killed; int status; /* Wait for parent by reading from the pipe. Abort if read fails. */ - if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + if (already_read_first) { + + already_read_first = 0; + + } else { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + } /* If we stopped the child in persistent mode, but there was a race condition and afl-fuzz already issued SIGKILL, write off the old @@ -267,8 +512,12 @@ static void __afl_start_forkserver(void) { int __afl_persistent_loop(unsigned int max_cnt) { - static u8 first_pass = 1; - static u32 cycle_cnt; + static u8 first_pass = 1; + static u32 cycle_cnt; + unsigned int map_size = MAP_SIZE; + + if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) + map_size = __afl_final_loc; if (first_pass) { @@ -279,9 +528,9 @@ int __afl_persistent_loop(unsigned int max_cnt) { if (is_persistent) { - memset(__afl_area_ptr, 0, MAP_SIZE); + memset(__afl_area_ptr, 0, map_size); __afl_area_ptr[0] = 1; - __afl_prev_loc = 0; + memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); } @@ -298,7 +547,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { raise(SIGSTOP); __afl_area_ptr[0] = 1; - __afl_prev_loc = 0; + memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); return 1; diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 2ca70659..84a9b8d9 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -112,11 +112,12 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const bool processStrcasecmp, const bool processStrncasecmp) { - std::vector<CallInst *> calls; - LLVMContext & C = M.getContext(); - IntegerType * Int8Ty = IntegerType::getInt8Ty(C); - IntegerType * Int32Ty = IntegerType::getInt32Ty(C); - IntegerType * Int64Ty = IntegerType::getInt64Ty(C); + DenseMap<Value *, std::string *> valueMap; + std::vector<CallInst *> calls; + LLVMContext & C = M.getContext(); + IntegerType * Int8Ty = IntegerType::getInt8Ty(C); + IntegerType * Int32Ty = IntegerType::getInt32Ty(C); + IntegerType * Int64Ty = IntegerType::getInt64Ty(C); #if LLVM_VERSION_MAJOR < 9 Constant * @@ -263,6 +264,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, bool isStrncmp = processStrncmp; bool isStrcasecmp = processStrcasecmp; bool isStrncasecmp = processStrncasecmp; + bool isIntMemcpy = true; + bool indirect = false; Function *Callee = callInst->getCalledFunction(); if (!Callee) continue; @@ -273,9 +276,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, isStrncmp &= !FuncName.compare(StringRef("strncmp")); isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp")); isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp")); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp) + !isStrncasecmp && !isIntMemcpy) continue; /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function @@ -309,7 +313,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, FT->getParamType(2)->isIntegerTy(); if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp) + !isStrncasecmp && !isIntMemcpy) continue; /* is a str{n,}{case,}cmp/memcmp, check if we have @@ -322,6 +326,97 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, bool HasStr1 = getConstantStringInfo(Str1P, Str1); bool HasStr2 = getConstantStringInfo(Str2P, Str2); + if (isIntMemcpy && HasStr2) { + + valueMap[Str1P] = new std::string(Str2.str()); + // fprintf(stderr, "saved %s for %p\n", Str2.str().c_str(), Str1P); + continue; + + } + + // not literal? maybe global or local variable + if (!(HasStr1 ^ HasStr2)) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString(); + valueMap[Str2P] = new std::string(Str2.str()); + // fprintf(stderr, "glo2 %s\n", Str2.str().c_str()); + + } + + } + + } + + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str1P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString(); + valueMap[Str1P] = new std::string(Str1.str()); + // fprintf(stderr, "glo1 %s\n", Str1.str().c_str()); + + } + + } + + } + + } else if (isIntMemcpy) { + + valueMap[Str1P] = new std::string(Str2.str()); + // fprintf(stderr, "saved\n"); + + } + + if ((HasStr1 ^ HasStr2)) indirect = true; + + } + + if (isIntMemcpy) continue; + + if (!(HasStr1 ^ HasStr2)) { + + // do we have a saved local variable initialization? + std::string *val = valueMap[Str1P]; + if (val && !val->empty()) { + + Str1 = StringRef(*val); + HasStr1 = true; + indirect = true; + // fprintf(stderr, "loaded1 %s\n", Str1.str().c_str()); + + } else { + + val = valueMap[Str2P]; + if (val && !val->empty()) { + + Str2 = StringRef(*val); + HasStr2 = true; + indirect = true; + // fprintf(stderr, "loaded2 %s\n", Str2.str().c_str()); + + } + + } + + } + /* handle cases of one string is const, one string is variable */ if (!(HasStr1 ^ HasStr2)) continue; @@ -334,9 +429,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, if (!ilen) continue; /* final precaution: if size of compare is larger than constant * string skip it*/ - uint64_t literalLength = - HasStr1 ? GetStringLength(Str1P) : GetStringLength(Str2P); - if (literalLength < ilen->getZExtValue()) continue; + uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); + if (literalLength + 1 < ilen->getZExtValue()) continue; } @@ -363,9 +457,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, std::string TmpConstStr; Value * VarStr; bool HasStr1 = getConstantStringInfo(Str1P, Str1); - getConstantStringInfo(Str2P, Str2); - uint64_t constLen, sizedLen; - bool isMemcmp = + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + uint64_t constLen, sizedLen; + bool isMemcmp = !callInst->getCalledFunction()->getName().compare(StringRef("memcmp")); bool isSizedcmp = isMemcmp || !callInst->getCalledFunction()->getName().compare( @@ -389,6 +483,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } + if (!(HasStr1 ^ HasStr2)) { + + // do we have a saved local or global variable initialization? + std::string *val = valueMap[Str1P]; + if (val && !val->empty()) { + + Str1 = StringRef(*val); + HasStr1 = true; + + } else { + + val = valueMap[Str2P]; + if (val && !val->empty()) { + + Str2 = StringRef(*val); + HasStr2 = true; + + } + + } + + } + if (HasStr1) { TmpConstStr = Str1.str(); diff --git a/llvm_mode/llvm-ngram-coverage.h b/llvm_mode/llvm-ngram-coverage.h new file mode 100644 index 00000000..12b666e9 --- /dev/null +++ b/llvm_mode/llvm-ngram-coverage.h @@ -0,0 +1,18 @@ +#ifndef AFL_NGRAM_CONFIG_H +#define AFL_NGRAM_CONFIG_H + +#include "../config.h" + +#if (MAP_SIZE_POW2 <= 16) +typedef u16 PREV_LOC_T; +#elif (MAP_SIZE_POW2 <= 32) +typedef u32 PREV_LOC_T; +#else +typedef u64 PREV_LOC_T; +#endif + +/* Maximum ngram size */ +#define NGRAM_SIZE_MAX 16U + +#endif + diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index d296ba3d..f0615f85 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -1235,8 +1235,8 @@ bool SplitComparesTransform::runOnModule(Module &M) { int bitw = 64; - char *bitw_env = getenv("LAF_SPLIT_COMPARES_BITW"); - if (!bitw_env) bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW"); + char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW"); + if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW"); if (bitw_env) { bitw = atoi(bitw_env); } enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL; |