diff options
Diffstat (limited to 'utils')
38 files changed, 911 insertions, 1981 deletions
diff --git a/utils/README.md b/utils/README.md index 336b6b6c..b157424f 100644 --- a/utils/README.md +++ b/utils/README.md @@ -32,7 +32,8 @@ Here's a quick overview of the stuff you can find in this directory: with additional gdb metadata. - custom_mutators - examples for the afl++ custom mutator interface in - C and Python + C and Python. Note: They were moved to + ../custom_mutators/examples/ - distributed_fuzzing - a sample script for synchronizing fuzzer instances across multiple machines (see parallel_fuzzing.md). diff --git a/utils/afl_frida/afl-frida.c b/utils/afl_frida/afl-frida.c index 711d8f33..e49d6f42 100644 --- a/utils/afl_frida/afl-frida.c +++ b/utils/afl_frida/afl-frida.c @@ -111,7 +111,7 @@ inline static void afl_maybe_log(guint64 current_pc) { } -#if GUM_NATIVE_CPU == GUM_CPU_AMD64 +#ifdef __x86_64__ static const guint8 afl_maybe_log_code[] = { @@ -177,7 +177,7 @@ void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output, if (instr->address >= range->code_start && instr->address <= range->code_end) { -#if GUM_NATIVE_CPU == GUM_CPU_AMD64 +#ifdef __x86_64__ GumX86Writer *cw = output->writer.x86; if (range->current_log_impl == 0 || !gum_x86_writer_can_branch_directly_between( diff --git a/utils/afl_network_proxy/afl-network-server.c b/utils/afl_network_proxy/afl-network-server.c index 0dfae658..60f174ee 100644 --- a/utils/afl_network_proxy/afl-network-server.c +++ b/utils/afl_network_proxy/afl-network-server.c @@ -45,7 +45,6 @@ #include <sys/wait.h> #include <sys/time.h> -#include <sys/shm.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/resource.h> @@ -53,7 +52,9 @@ #include <netinet/ip6.h> #include <arpa/inet.h> #include <sys/mman.h> -#include <sys/shm.h> +#ifndef USEMMAP + #include <sys/shm.h> +#endif #include <sys/socket.h> #include <netdb.h> diff --git a/utils/afl_proxy/afl-proxy.c b/utils/afl_proxy/afl-proxy.c index aa7a361a..6006e238 100644 --- a/utils/afl_proxy/afl-proxy.c +++ b/utils/afl_proxy/afl-proxy.c @@ -70,6 +70,10 @@ static void __afl_map_shm(void) { char *id_str = getenv(SHM_ENV_VAR); char *ptr; + /* NOTE TODO BUG FIXME: if you want to supply a variable sized map then + uncomment the following: */ + + /* if ((ptr = getenv("AFL_MAP_SIZE")) != NULL) { u32 val = atoi(ptr); @@ -77,6 +81,8 @@ static void __afl_map_shm(void) { } + */ + if (__afl_map_size > MAP_SIZE) { if (__afl_map_size > FS_OPT_MAX_MAPSIZE) { @@ -189,10 +195,7 @@ static u32 __afl_next_testcase(u8 *buf, u32 max_len) { /* report that we are starting the target */ if (write(FORKSRV_FD + 1, &res, 4) != 4) return 0; - if (status < 1) - return 0; - else - return status; + return status; } @@ -210,7 +213,7 @@ int main(int argc, char *argv[]) { /* This is were the testcase data is written into */ u8 buf[1024]; // this is the maximum size for a test case! set it! - u32 len; + s32 len; /* here you specify the map size you need that you are reporting to afl-fuzz. Any value is fine as long as it can be divided by 32. */ @@ -222,10 +225,20 @@ int main(int argc, char *argv[]) { while ((len = __afl_next_testcase(buf, sizeof(buf))) > 0) { - /* here you have to create the magic that feeds the buf/len to the - target and write the coverage to __afl_area_ptr */ + if (len > 4) { // the minimum data size you need for the target + + /* here you have to create the magic that feeds the buf/len to the + target and write the coverage to __afl_area_ptr */ - // ... the magic ... + // ... the magic ... + + // remove this, this is just to make afl-fuzz not complain when run + if (buf[0] == 0xff) + __afl_area_ptr[1] = 1; + else + __afl_area_ptr[2] = 2; + + } /* report the test case is done and wait for the next */ __afl_end_testcase(); diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index c1a087d7..ad99b893 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -7,7 +7,7 @@ ifneq "" "$(LLVM_BINDIR)" LLVM_BINDIR := $(LLVM_BINDIR)/ endif -CFLAGS := -O3 -funroll-loops -g +CFLAGS := -O3 -funroll-loops -g -fPIC all: libAFLDriver.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so @@ -26,17 +26,17 @@ debug: ar ru libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c - $(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c + -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o - ar ru libAFLQemuDriver.a aflpp_qemu_driver.o - cp -vf libAFLQemuDriver.a ../../ + -ar ru libAFLQemuDriver.a aflpp_qemu_driver.o + -cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so + -test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - $(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c + -test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md new file mode 100644 index 00000000..f03c2fe3 --- /dev/null +++ b/utils/aflpp_driver/README.md @@ -0,0 +1,36 @@ +# afl++ drivers + +## aflpp_driver + +aflpp_driver is used to compile directly libfuzzer `LLVMFuzzerTestOneInput()` +targets. + +Just do `afl-clang-fast++ -o fuzz fuzzer_harness.cc libAFLDriver.a [plus required linking]`. + +You can also sneakily do this little trick: +If this is the clang compile command to build for libfuzzer: + `clang++ -o fuzz -fsanitize=fuzzer fuzzer_harness.cc -lfoo` +then just switch `clang++` with `afl-clang-fast++` and our compiler will +magically insert libAFLDriver.a :) + +To use shared-memory testcases, you need nothing to do. +To use stdin testcases give `-` as the only command line parameter. +To use file input testcases give `@@` as the only command line parameter. + +IMPORTANT: if you use `afl-cmin` or `afl-cmin.bash` then either pass `-` +or `@@` as command line parameters. + +## aflpp_qemu_driver + +aflpp_qemu_driver is used for libfuzzer `LLVMFuzzerTestOneInput()` targets that +are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without +-fsantize=fuzzer or afl-clang-fast, and link in libAFLQemuDriver.a: + +`clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. + + +Then just do (where the name of the binary is `fuzz`): +``` +AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}') +AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` +``` diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c index ad781e64..c094c425 100644 --- a/utils/aflpp_driver/aflpp_driver.c +++ b/utils/aflpp_driver/aflpp_driver.c @@ -174,11 +174,17 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { static int ExecuteFilesOnyByOne(int argc, char **argv) { unsigned char *buf = (unsigned char *)malloc(MAX_FILE); + for (int i = 1; i < argc; i++) { - int fd = open(argv[i], O_RDONLY); - if (fd == -1) continue; + int fd = 0; + + if (strcmp(argv[i], "-") != 0) { fd = open(argv[i], O_RDONLY); } + + if (fd == -1) { continue; } + ssize_t length = read(fd, buf, MAX_FILE); + if (length > 0) { printf("Reading %zu bytes from %s\n", length, argv[i]); @@ -187,7 +193,7 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) { } - close(fd); + if (fd > 0) { close(fd); } } @@ -199,15 +205,19 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) { int main(int argc, char **argv) { printf( - "======================= INFO =========================\n" + "============================== INFO ================================\n" "This binary is built for afl++.\n" + "To use with afl-cmin or afl-cmin.bash pass '-' as single command line " + "option\n" "To run the target function on individual input(s) execute this:\n" " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" "To fuzz with afl-fuzz execute this:\n" " afl-fuzz [afl-flags] -- %s [-N]\n" "afl-fuzz will run N iterations before re-spawning the process (default: " "INT_MAX)\n" - "======================================================\n", + "For stdin input processing, pass '-' as single command line option.\n" + "For file input processing, pass '@@' as single command line option.\n" + "===================================================================\n", argv[0], argv[0]); if (getenv("AFL_GDB")) { @@ -237,22 +247,35 @@ int main(int argc, char **argv) { memcpy(dummy_input, (void *)AFL_PERSISTENT, sizeof(AFL_PERSISTENT)); memcpy(dummy_input + 32, (void *)AFL_DEFER_FORKSVR, sizeof(AFL_DEFER_FORKSVR)); + int N = INT_MAX; - if (argc == 2 && argv[1][0] == '-') + + if (argc == 2 && !strcmp(argv[1], "-")) { + + __afl_sharedmem_fuzzing = 0; + __afl_manual_init(); + return ExecuteFilesOnyByOne(argc, argv); + + } else if (argc == 2 && argv[1][0] == '-') { + N = atoi(argv[1] + 1); - else if (argc == 2 && (N = atoi(argv[1])) > 0) + + } else if (argc == 2 && (N = atoi(argv[1])) > 0) { + printf("WARNING: using the deprecated call style `%s %d`\n", argv[0], N); - else if (argc > 1) { + + } else if (argc > 1) { __afl_sharedmem_fuzzing = 0; - __afl_manual_init(); + + if (argc == 2) { __afl_manual_init(); } + return ExecuteFilesOnyByOne(argc, argv); } assert(N > 0); - // if (!getenv("AFL_DRIVER_DONT_DEFER")) __afl_manual_init(); // Call LLVMFuzzerTestOneInput here so that coverage caused by initialization @@ -271,6 +294,7 @@ int main(int argc, char **argv) { fprintf(stderr, "%02x", __afl_fuzz_ptr[i]); fprintf(stderr, "\n"); #endif + if (*__afl_fuzz_len) { num_runs++; diff --git a/utils/aflpp_driver/aflpp_qemu_driver_hook.c b/utils/aflpp_driver/aflpp_qemu_driver_hook.c index 823cc42d..2979fadc 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver_hook.c +++ b/utils/aflpp_driver/aflpp_qemu_driver_hook.c @@ -1,21 +1,30 @@ +#include "../../qemu_mode/qemuafl/qemuafl/api.h" + #include <stdint.h> #include <string.h> #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) +#define h2g(x) ((uint64_t)(x)-guest_base) -#define REGS_RDI 7 -#define REGS_RSI 6 +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { -void afl_persistent_hook(uint64_t *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_len) { + // In this example the register RDI is pointing to the memory location + // of the target buffer, and the length of the input is in RSI. + // This can be seen with a debugger, e.g. gdb (and "disass main") - memcpy(g2h(regs[REGS_RDI]), input_buf, input_len); - regs[REGS_RSI] = input_len; + memcpy(g2h(regs->rdi), input_buf, input_buf_len); + regs->rsi = input_buf_len; } +#undef g2h +#undef h2g + int afl_persistent_hook_init(void) { + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) return 1; } diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py new file mode 100644 index 00000000..f64e3fae --- /dev/null +++ b/utils/autodict_ql/autodict-ql.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# AutoDict-QL - Optimal Token Generation for Fuzzing +# Part of AFL++ Project +# Developed and Maintained by Arash Ale Ebrahim (@Microsvuln) +# Usage : python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH] +# CURRENT_DIR = full of your current Dir +# CODEQL_DATABASE_PATH = Full path to your CodeQL database +# TOKEN_PATH = Folder name of the newly generated tokens +# Example : python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens +# Just pass the tokens folder to the -x flag of your fuzzer + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import shutil +import subprocess + +from binascii import unhexlify + + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + + # parser.add_argument("tokenpath", + # help="Destination directory for tokens") + parser.add_argument("cur", help="Current Path") + parser.add_argument("db", help="CodeQL database Path") + parser.add_argument("tokenpath", help="Destination directory for tokens") + + return parser.parse_args() + + +def static_analysis(file, file2, cur, db): + with open(cur + "/" + file, "w") as f: + print(cur + "/" + file) + stream = os.popen("codeql query run " + cur + "/" + file2 + " -d " + db) + output = stream.read() + f.write(output) + f.close() + + +def copy_tokens(cur, tokenpath): + subprocess.call( + ["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."], shell=True + ) + subprocess.call( + ["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"], shell=True + ) + subprocess.call(["rm *.out"], shell=True) + subprocess.call(["find " + tokenpath + " -size 0 -delete"], shell=True) + + +def codeql_analysis(cur, db): + static_analysis("litout.out", "litool.ql", cur, db) + static_analysis("strcmp-strings.out", "strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out", "strncmp-str.ql", cur, db) + static_analysis("memcmp-strings.out", "memcmp-str.ql", cur, db) + static_analysis("strtool-strings.out", "strtool.ql", cur, db) + start_autodict(0, cur) + + +def start_autodict(tokenpath, cur): + command = ["python3", cur + "/litan.py", cur + "/lits/", cur + "/litout.out"] + worker1 = subprocess.Popen(command) + print(worker1.communicate()) + + command1 = [ + "python3", + cur + "/strcmp-strings.py", + cur + "/strcmp-strs/", + cur + "/strcmp-strings.out", + ] + worker2 = subprocess.Popen(command1) + print(worker2.communicate()) + + command2 = [ + "python3", + cur + "/strncmp-strings.py", + cur + "/strncmp-strs/", + cur + "/strncmp-strings.out", + ] + worker3 = subprocess.Popen(command2) + print(worker3.communicate()) + + command5 = [ + "python3", + cur + "/memcmp-strings.py", + cur + "/memcmp-strs/", + cur + "/memcmp-strings.out", + ] + worker6 = subprocess.Popen(command5) + print(worker6.communicate()) + + command8 = [ + "python3", + cur + "/stan-strings.py", + cur + "/strtool-strs/", + cur + "/strtool-strings.out", + ] + worker9 = subprocess.Popen(command8) + print(worker9.communicate()) + + +def main(): + args = parse_args() + ensure_dir(args.tokenpath) + # copy_tokens(args.cur, args.tokenpath) + codeql_analysis(args.cur, args.db) + copy_tokens(args.cur, args.tokenpath) + # start_autodict(args.tokenpath, args.cur) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh new file mode 100644 index 00000000..6ae4b362 --- /dev/null +++ b/utils/autodict_ql/build-codeql.sh @@ -0,0 +1,17 @@ +cd ~ +if [ -d "codeql-home" ]; then + echo "Exist !" + exit 1 +fi +mkdir codeql-home +cd codeql-home +git clone https://github.com/github/codeql.git codeql-repo +git clone https://github.com/github/codeql-go.git +wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip +unzip codeql-linux64.zip +mv codeql codeql-cli +export "PATH=~/codeql-home/codeql-cli/:$PATH" +echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc +codeql resolve languages +codeql resolve qlpacks +codeql diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py new file mode 100644 index 00000000..7033d363 --- /dev/null +++ b/utils/autodict_ql/litan.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com +import string +import os +import binascii +import codecs +import struct +import errno +import argparse +import re +import base64 +from binascii import unhexlify + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate files." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt", + ) + return parser.parse_args() + + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno == errno.EEXIST: + # print "[-] Directory exists, specify another directory" + exit(1) + + +def do_analysis1(corpdir, infile): + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() + new_lst = [] + n = 1 + for i, num in enumerate(lines): + if i != 0: + new_lst.append(num) + str1 = str(num) + print("num is " + str1) + str1 = str1.rstrip("\n\n") + # str1 = str1.replace("0x",""); + str1 = str1.replace("|", "") + str1 = str1.rstrip("\r\n") + str1 = str1.rstrip("\n") + str1 = str1.replace(" ", "") + # str1 = str1.translate(None, string.punctuation) + translator = str.maketrans("", "", string.punctuation) + str1 = str1.translate(translator) + str1 = str1[1:] + str1 = str1[:-1] + print("After cleanup : " + str1) + if ( + (str1 != "0") + and (str1 != "ffffffff") + and (str1 != "fffffffe") + or (len(str1) == 4) + or (len(str1) == 8) + ): + print("first : " + str1) + if len(str1) > 8: + str1 = str1[:-1] + elif len(str1) == 5: + str1 = str1 = "0" + try: + # str1 = str1.decode("hex") + with open(corpdir + "/lit-seed{0}".format(n), "w") as file: + str1 = str1.replace("0x", "") + print(str1) + str1 = int(str1, base=16) + str1 = str1.to_bytes(4, byteorder="little") + file.write(str(str1)) + file.close() + with open(corpdir + "/lit-seed{0}".format(n), "r") as q: + a = q.readline() + a = a[1:] + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token :" + + str(a) + ) + q.close() + with open( + corpdir + "/lit-seed{0}".format(n), "w" + ) as w1: + w1.write(str(a)) + print("Done!") + w1.close() + except: + print("Error!") + n = n + 1 + + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_analysis1(args.corpdir, args.infile) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/litool.ql b/utils/autodict_ql/litool.ql new file mode 100644 index 00000000..76f429c1 --- /dev/null +++ b/utils/autodict_ql/litool.ql @@ -0,0 +1,10 @@ +import cpp + +class HexOrOctLiteral extends Literal{ + HexOrOctLiteral(){ + (this instanceof HexLiteral) or (this instanceof OctalLiteral) + } +} + +from HexOrOctLiteral lit +select lit.getValueText() diff --git a/utils/autodict_ql/memcmp-str.ql b/utils/autodict_ql/memcmp-str.ql new file mode 100644 index 00000000..830c9cac --- /dev/null +++ b/utils/autodict_ql/memcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : memcmp trace + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("memcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py new file mode 100644 index 00000000..270a697c --- /dev/null +++ b/utils/autodict_ql/memcmp-strings.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + # print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): + print("Space / Paranthesis String : %s" % str11) + else: + with open(corpdir + "/memcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml new file mode 100644 index 00000000..28892f24 --- /dev/null +++ b/utils/autodict_ql/qlpack.yml @@ -0,0 +1,3 @@ +name: autodict +version: 0.0.0 +libraryPathDependencies: codeql-cpp diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md new file mode 100644 index 00000000..9170f552 --- /dev/null +++ b/utils/autodict_ql/readme.md @@ -0,0 +1,100 @@ +# Autodict-QL - Optimal Token Generation for Fuzzing + +## What is this? + +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses CodeQL (a powerful semantic code analysis engine) to fetch information from a code base. + +Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campaign. If you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . + + +## Why CodeQL ? +We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. + + +## CodeQL scripts +Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test. + +Currently we provided the following CodeQL scripts : + +`strcmp-str.ql` is used to extract strings that are related to the `strcmp` function. + +`strncmp-str.ql` is used to extract the strings from the `strncmp` function. + +`memcmp-str.ql` is used to extract the strings from the `memcmp` function. + +`litool.ql` extracts Magic numbers as Hexadecimal format. + +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. If `strcmp` is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. + +You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. + + +## Usage + +Before you proceed to installation make sure that you have the following packages by installing them : +```shell +sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y +``` +The usage of Autodict-QL is pretty easy. But let's describe it as: + +1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. +Do the following : +```shell +# chmod +x codeql-build.sh +# ./codeql-build.sh +# source ~/.bashrc +# codeql +``` +Then you should get: + +```shell +Usage: codeql <command> <argument>... +Create and query CodeQL databases, or work with the QL language. + +GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is +not itself free software. Type codeql --license to see the license terms. + + --license Show the license terms for the CodeQL toolchain. +Common options: + -h, --help Show this help text. + -v, --verbose Incrementally increase the number of progress messages printed. + -q, --quiet Incrementally decrease the number of progress messages printed. +Some advanced options have been hidden; try --help -v for a fuller view. +Commands: + query Compile and execute QL code. + bqrs Get information from .bqrs files. + database Create, analyze and process CodeQL databases. + dataset [Plumbing] Work with raw QL datasets. + test Execute QL unit tests. + resolve [Deep plumbing] Helper commands to resolve disk locations etc. + execute [Deep plumbing] Low-level commands that need special JVM options. + version Show the version of the CodeQL toolchain. + generate Generate formatted QL documentation. + github Commands useful for interacting with the GitHub API through CodeQL. +``` + +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard. + - First you need to create a CodeQL database of the project codebase, suppose we want to compile `libxml` with codeql. Go to libxml and issue the following commands: + - `./configure --disable-shared` + - `codeql create database libxml-db --language=cpp --command=make` + - Now you have the CodeQL database of the project :-) +3. The final step is to update the CodeQL database you created in step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory): + - `codeql database upgrade /home/user/libxml/libxml-db` +4. Everything is set! Now you should issue the following to get the tokens: + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to AFL++'s `-x` flag. +5. Done! + + +## More on dictionaries and tokens +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extract useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. In addition it's independent from LLVM system. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file formats and specifications. For example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generate dictionaries based on the target. + +I've personally prefered to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manually generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. +In overall, fuzzing with dictionaries and well-generated tokens will give better results. + +There are 2 important points to remember : + +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chances to discover new bugs. +- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py new file mode 100644 index 00000000..81cb0b97 --- /dev/null +++ b/utils/autodict_ql/stan-strings.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + # print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): + print("Space / Paranthesis String : %s" % str11) + else: + with open(corpdir + "/seed-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strcmp-str.ql b/utils/autodict_ql/strcmp-str.ql new file mode 100644 index 00000000..83ffadaf --- /dev/null +++ b/utils/autodict_ql/strcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strcmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py new file mode 100644 index 00000000..9c2520c9 --- /dev/null +++ b/utils/autodict_ql/strcmp-strings.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + # print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): + print("Space / Paranthesis String : %s" % str11) + else: + with open(corpdir + "/strcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strncmp-str.ql b/utils/autodict_ql/strncmp-str.ql new file mode 100644 index 00000000..dbb952e5 --- /dev/null +++ b/utils/autodict_ql/strncmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strncmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strncmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py new file mode 100644 index 00000000..6206b4c4 --- /dev/null +++ b/utils/autodict_ql/strncmp-strings.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def parse_args(): + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + # print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): + print("Space / Paranthesis String : %s" % str11) + else: + with open(corpdir + "/strncmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql new file mode 100644 index 00000000..253d1555 --- /dev/null +++ b/utils/autodict_ql/strtool.ql @@ -0,0 +1,24 @@ +import cpp +import semmle.code.cpp.dataflow.DataFlow +class StringLiteralNode extends DataFlow::Node { + StringLiteralNode() { this.asExpr() instanceof StringLiteral } +} +class CmpArgNode extends DataFlow::Node { + CmpArgNode() { + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(0) = this.asExpr() + ) + or + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(1) = this.asExpr() + ) + } +} + +from StringLiteralNode src, CmpArgNode arg +where + DataFlow::localFlow(src, arg) + +select src.asExpr().(StringLiteral).toString() \ No newline at end of file diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index a752458d..4d75430e 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -90,12 +90,15 @@ for crash in $DIR/crashes/id:*; do for a in $@; do - if [ "$a" = "@@" ] ; then - use_args="$use_args $crash" + case "$a" in + *@@*) unset use_stdio - else + use_args="$use_args `printf %s "$a" | sed -e 's<@@<'$crash'<g'`" + ;; + *) use_args="$use_args $a" - fi + ;; + esac done diff --git a/utils/custom_mutators/Makefile b/utils/custom_mutators/Makefile deleted file mode 100644 index 9849f3f4..00000000 --- a/utils/custom_mutators/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -all: libexamplemutator.so - -libexamplemutator.so: - $(CC) $(CFLAGS) -D_FORTIFY_SOURCE=2 -O3 -fPIC -shared -g -I ../../include example.c -o libexamplemutator.so - -clean: - rm -rf libexamplemutator.so diff --git a/utils/custom_mutators/README.md b/utils/custom_mutators/README.md deleted file mode 100644 index 655f7a5e..00000000 --- a/utils/custom_mutators/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Examples for the custom mutator - -These are example and helper files for the custom mutator feature. -See [docs/custom_mutators.md](../../docs/custom_mutators.md) for more information - -Note that if you compile with python3.7 you must use python3 scripts, and if -you use python2.7 to compile python2 scripts! - -simple_example.c - most simplest example. generates a random sized buffer - filled with 'A' - -example.c - this is a simple example written in C and should be compiled to a - shared library. Use make to compile it and produce libexamplemutator.so - -example.py - this is the template you can use, the functions are there but they - are empty - -post_library_gif.so.c - fix a fuzz input to ensure it is valid for GIF - -post_library_png.so.c - fix a fuzz input to ensure it is valid for PNG - -simple-chunk-replace.py - this is a simple example where chunks are replaced - -common.py - this can be used for common functions and helpers. - the examples do not use this though. But you can :) - -wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py - -XmlMutatorMin.py - module for XML mutation - -custom_mutator_helpers.h is an header that defines some helper routines -like surgical_havoc_mutate() that allow to perform a randomly chosen -mutation from a subset of the havoc mutations. -If you do so, you have to specify -I /path/to/AFLplusplus/include when -compiling. diff --git a/utils/custom_mutators/XmlMutatorMin.py b/utils/custom_mutators/XmlMutatorMin.py deleted file mode 100644 index 3e6cd0ff..00000000 --- a/utils/custom_mutators/XmlMutatorMin.py +++ /dev/null @@ -1,348 +0,0 @@ -#!/usr/bin/python - -""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """ - -from __future__ import print_function -from copy import deepcopy -from lxml import etree as ET -import random, re, io - - -########################### -# The XmlMutatorMin class # -########################### - - -class XmlMutatorMin: - - """ - Optionals parameters: - seed Seed used by the PRNG (default: "RANDOM") - verbose Verbosity (default: False) - """ - - def __init__(self, seed="RANDOM", verbose=False): - - """ Initialize seed, database and mutators """ - - # Verbosity - self.verbose = verbose - - # Initialize PRNG - self.seed = str(seed) - if self.seed == "RANDOM": - random.seed() - else: - if self.verbose: - print("Static seed '%s'" % self.seed) - random.seed(self.seed) - - # Initialize input and output documents - self.input_tree = None - self.tree = None - - # High-level mutators (no database needed) - hl_mutators_delete = [ - "del_node_and_children", - "del_node_but_children", - "del_attribute", - "del_content", - ] # Delete items - hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values - - # Exposed mutators - self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete - - def __parse_xml(self, xml): - - """ Parse an XML string. Basic wrapper around lxml.parse() """ - - try: - # Function parse() takes care of comments / DTD / processing instructions / ... - tree = ET.parse(io.BytesIO(xml)) - except ET.ParseError: - raise RuntimeError("XML isn't well-formed!") - except LookupError as e: - raise RuntimeError(e) - - # Return a document wrapper - return tree - - def __exec_among(self, module, functions, min_times, max_times): - - """ Randomly execute $functions between $min and $max times """ - - for i in xrange(random.randint(min_times, max_times)): - # Function names are mangled because they are "private" - getattr(module, "_XmlMutatorMin__" + random.choice(functions))() - - def __serialize_xml(self, tree): - - """ Serialize a XML document. Basic wrapper around lxml.tostring() """ - - return ET.tostring( - tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding - ) - - def __ver(self, version): - - """ Helper for displaying lxml version numbers """ - - return ".".join(map(str, version)) - - def reset(self): - - """ Reset the mutator """ - - self.tree = deepcopy(self.input_tree) - - def init_from_string(self, input_string): - - """ Initialize the mutator from a XML string """ - - # Get a pointer to the top-element - self.input_tree = self.__parse_xml(input_string) - - # Get a working copy - self.tree = deepcopy(self.input_tree) - - def save_to_string(self): - - """ Return the current XML document as UTF-8 string """ - - # Return a text version of the tree - return self.__serialize_xml(self.tree) - - def __pick_element(self, exclude_root_node=False): - - """ Pick a random element from the current document """ - - # Get a list of all elements, but nodes like PI and comments - elems = list(self.tree.getroot().iter(tag=ET.Element)) - - # Is the root node excluded? - if exclude_root_node: - start = 1 - else: - start = 0 - - # Pick a random element - try: - elem_id = random.randint(start, len(elems) - 1) - elem = elems[elem_id] - except ValueError: - # Should only occurs if "exclude_root_node = True" - return (None, None) - - return (elem_id, elem) - - def __fuzz_attribute(self): - - """ Fuzz (part of) an attribute value """ - - # Select a node to modify - (rand_elem_id, rand_elem) = self.__pick_element() - - # Get all the attributes - attribs = rand_elem.keys() - - # Is there attributes? - if len(attribs) < 1: - if self.verbose: - print("No attribute: can't replace!") - return - - # Pick a random attribute - rand_attrib_id = random.randint(0, len(attribs) - 1) - rand_attrib = attribs[rand_attrib_id] - - # We have the attribute to modify - # Get its value - attrib_value = rand_elem.get(rand_attrib) - # print("- Value: " + attrib_value) - - # Should we work on the whole value? - func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)" - p = re.compile(func_call) - l = p.findall(attrib_value) - if random.choice((True, False)) and l: - # Randomly pick one the function calls - (func, args) = random.choice(l) - # Split by "," and randomly pick one of the arguments - value = random.choice(args.split(",")) - # Remove superfluous characters - unclean_value = value - value = value.strip(" ").strip("'") - # print("Selected argument: [%s]" % value) - else: - value = attrib_value - - # For each type, define some possible replacement values - choices_number = ( - "0", - "11111", - "-128", - "2", - "-1", - "1/3", - "42/0", - "1094861636 idiv 1.0", - "-1123329771506872 idiv 3.8", - "17=$numericRTF", - str(3 + random.randrange(0, 100)), - ) - - choices_letter = ( - "P" * (25 * random.randrange(1, 100)), - "%s%s%s%s%s%s", - "foobar", - ) - - choices_alnum = ( - "Abc123", - "020F0302020204030204", - "020F0302020204030204" * (random.randrange(5, 20)), - ) - - # Fuzz the value - if random.choice((True, False)) and value == "": - - # Empty - new_value = value - - elif random.choice((True, False)) and value.isdigit(): - - # Numbers - new_value = random.choice(choices_number) - - elif random.choice((True, False)) and value.isalpha(): - - # Letters - new_value = random.choice(choices_letter) - - elif random.choice((True, False)) and value.isalnum(): - - # Alphanumeric - new_value = random.choice(choices_alnum) - - else: - - # Default type - new_value = random.choice(choices_alnum + choices_letter + choices_number) - - # If we worked on a substring, apply changes to the whole string - if value != attrib_value: - # No ' around empty values - if new_value != "" and value != "": - new_value = "'" + new_value + "'" - # Apply changes - new_value = attrib_value.replace(unclean_value, new_value) - - # Log something - if self.verbose: - print( - "Fuzzing attribute #%i '%s' of tag #%i '%s'" - % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) - ) - - # Modify the attribute - rand_elem.set(rand_attrib, new_value.decode("utf-8")) - - def __del_node_and_children(self): - - """High-level minimizing mutator - Delete a random node and its children (i.e. delete a random tree)""" - - self.__del_node(True) - - def __del_node_but_children(self): - - """High-level minimizing mutator - Delete a random node but its children (i.e. link them to the parent of the deleted node)""" - - self.__del_node(False) - - def __del_node(self, delete_children): - - """ Called by the __del_node_* mutators """ - - # Select a node to modify (but the root one) - (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True) - - # If the document includes only a top-level element - # Then we can't pick a element (given that "exclude_root_node = True") - - # Is the document deep enough? - if rand_elem is None: - if self.verbose: - print("Can't delete a node: document not deep enough!") - return - - # Log something - if self.verbose: - but_or_and = "and" if delete_children else "but" - print( - "Deleting tag #%i '%s' %s its children" - % (rand_elem_id, rand_elem.tag, but_or_and) - ) - - if delete_children is False: - # Link children of the random (soon to be deleted) node to its parent - for child in rand_elem: - rand_elem.getparent().append(child) - - # Remove the node - rand_elem.getparent().remove(rand_elem) - - def __del_content(self): - - """High-level minimizing mutator - Delete the attributes and children of a random node""" - - # Select a node to modify - (rand_elem_id, rand_elem) = self.__pick_element() - - # Log something - if self.verbose: - print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag)) - - # Reset the node - rand_elem.clear() - - def __del_attribute(self): - - """High-level minimizing mutator - Delete a random attribute from a random node""" - - # Select a node to modify - (rand_elem_id, rand_elem) = self.__pick_element() - - # Get all the attributes - attribs = rand_elem.keys() - - # Is there attributes? - if len(attribs) < 1: - if self.verbose: - print("No attribute: can't delete!") - return - - # Pick a random attribute - rand_attrib_id = random.randint(0, len(attribs) - 1) - rand_attrib = attribs[rand_attrib_id] - - # Log something - if self.verbose: - print( - "Deleting attribute #%i '%s' of tag #%i '%s'" - % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) - ) - - # Delete the attribute - rand_elem.attrib.pop(rand_attrib) - - def mutate(self, min=1, max=5): - - """ Execute some high-level mutators between $min and $max times, then some medium-level ones """ - - # High-level mutation - self.__exec_among(self, self.hl_mutators_all, min, max) diff --git a/utils/custom_mutators/common.py b/utils/custom_mutators/common.py deleted file mode 100644 index 44a5056a..00000000 --- a/utils/custom_mutators/common.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -Module containing functions shared between multiple AFL modules - -@author: Christian Holler (:decoder) - -@license: - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. - -@contact: choller@mozilla.com -""" - -from __future__ import print_function -import random -import os -import re - - -def randel(l): - if not l: - return None - return l[random.randint(0, len(l) - 1)] - - -def randel_pop(l): - if not l: - return None - return l.pop(random.randint(0, len(l) - 1)) - - -def write_exc_example(data, exc): - exc_name = re.sub(r"[^a-zA-Z0-9]", "_", repr(exc)) - - if not os.path.exists(exc_name): - with open(exc_name, "w") as f: - f.write(data) diff --git a/utils/custom_mutators/custom_mutator_helpers.h b/utils/custom_mutators/custom_mutator_helpers.h deleted file mode 100644 index 62e6efba..00000000 --- a/utils/custom_mutators/custom_mutator_helpers.h +++ /dev/null @@ -1,342 +0,0 @@ -#ifndef CUSTOM_MUTATOR_HELPERS -#define CUSTOM_MUTATOR_HELPERS - -#include "config.h" -#include "types.h" -#include <stdlib.h> - -#define INITIAL_GROWTH_SIZE (64) - -#define RAND_BELOW(limit) (rand() % (limit)) - -/* Use in a struct: creates a name_buf and a name_size variable. */ -#define BUF_VAR(type, name) \ - type * name##_buf; \ - size_t name##_size; -/* this fills in `&structptr->something_buf, &structptr->something_size`. */ -#define BUF_PARAMS(struct, name) \ - (void **)&struct->name##_buf, &struct->name##_size - -typedef struct { - -} afl_t; - -static void surgical_havoc_mutate(u8 *out_buf, s32 begin, s32 end) { - - static s8 interesting_8[] = {INTERESTING_8}; - static s16 interesting_16[] = {INTERESTING_8, INTERESTING_16}; - static s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32}; - - switch (RAND_BELOW(12)) { - - case 0: { - - /* Flip a single bit somewhere. Spooky! */ - - s32 bit_idx = ((RAND_BELOW(end - begin) + begin) << 3) + RAND_BELOW(8); - - out_buf[bit_idx >> 3] ^= 128 >> (bit_idx & 7); - - break; - - } - - case 1: { - - /* Set byte to interesting value. */ - - u8 val = interesting_8[RAND_BELOW(sizeof(interesting_8))]; - out_buf[(RAND_BELOW(end - begin) + begin)] = val; - - break; - - } - - case 2: { - - /* Set word to interesting value, randomly choosing endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u16 *)(out_buf + byte_idx) = - interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]; - break; - case 1: - *(u16 *)(out_buf + byte_idx) = - SWAP16(interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]); - break; - - } - - break; - - } - - case 3: { - - /* Set dword to interesting value, randomly choosing endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u32 *)(out_buf + byte_idx) = - interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; - break; - case 1: - *(u32 *)(out_buf + byte_idx) = - SWAP32(interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); - break; - - } - - break; - - } - - case 4: { - - /* Set qword to interesting value, randomly choosing endian. */ - - if (end - begin < 8) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 7) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u64 *)(out_buf + byte_idx) = - (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; - break; - case 1: - *(u64 *)(out_buf + byte_idx) = SWAP64( - (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); - break; - - } - - break; - - } - - case 5: { - - /* Randomly subtract from byte. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] -= 1 + RAND_BELOW(ARITH_MAX); - - break; - - } - - case 6: { - - /* Randomly add to byte. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] += 1 + RAND_BELOW(ARITH_MAX); - - break; - - } - - case 7: { - - /* Randomly subtract from word, random endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - if (RAND_BELOW(2)) { - - *(u16 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u16 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u16 *)(out_buf + byte_idx) = - SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) - num); - - } - - break; - - } - - case 8: { - - /* Randomly add to word, random endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - if (RAND_BELOW(2)) { - - *(u16 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u16 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u16 *)(out_buf + byte_idx) = - SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) + num); - - } - - break; - - } - - case 9: { - - /* Randomly subtract from dword, random endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - if (RAND_BELOW(2)) { - - *(u32 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u32 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u32 *)(out_buf + byte_idx) = - SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) - num); - - } - - break; - - } - - case 10: { - - /* Randomly add to dword, random endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - if (RAND_BELOW(2)) { - - *(u32 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u32 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u32 *)(out_buf + byte_idx) = - SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) + num); - - } - - break; - - } - - case 11: { - - /* Just set a random byte to a random value. Because, - why not. We use XOR with 1-255 to eliminate the - possibility of a no-op. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] ^= 1 + RAND_BELOW(255); - - break; - - } - - } - -} - -/* This function calculates the next power of 2 greater or equal its argument. - @return The rounded up power of 2 (if no overflow) or 0 on overflow. -*/ -static inline size_t next_pow2(size_t in) { - - if (in == 0 || in > (size_t)-1) - return 0; /* avoid undefined behaviour under-/overflow */ - size_t out = in - 1; - out |= out >> 1; - out |= out >> 2; - out |= out >> 4; - out |= out >> 8; - out |= out >> 16; - return out + 1; - -} - -/* This function makes sure *size is > size_needed after call. - It will realloc *buf otherwise. - *size will grow exponentially as per: - https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/ - Will return NULL and free *buf if size_needed is <1 or realloc failed. - @return For convenience, this function returns *buf. - */ -static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) { - - /* No need to realloc */ - if (likely(size_needed && *size >= size_needed)) return *buf; - - /* No initial size was set */ - if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE; - - /* grow exponentially */ - size_t next_size = next_pow2(size_needed); - - /* handle overflow */ - if (!next_size) { next_size = size_needed; } - - /* alloc */ - *buf = realloc(*buf, next_size); - *size = *buf ? next_size : 0; - - return *buf; - -} - -/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */ -static inline void afl_swap_bufs(void **buf1, size_t *size1, void **buf2, - size_t *size2) { - - void * scratch_buf = *buf1; - size_t scratch_size = *size1; - *buf1 = *buf2; - *size1 = *size2; - *buf2 = scratch_buf; - *size2 = scratch_size; - -} - -#undef INITIAL_GROWTH_SIZE - -#endif - diff --git a/utils/custom_mutators/example.c b/utils/custom_mutators/example.c deleted file mode 100644 index 23add128..00000000 --- a/utils/custom_mutators/example.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - New Custom Mutator for AFL++ - Written by Khaled Yakdan <yakdan@code-intelligence.de> - Andrea Fioraldi <andreafioraldi@gmail.com> - Shengtuo Hu <h1994st@gmail.com> - Dominik Maier <mail@dmnk.co> -*/ - -// You need to use -I /path/to/AFLplusplus/include -#include "custom_mutator_helpers.h" - -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> - -#define DATA_SIZE (100) - -static const char *commands[] = { - - "GET", - "PUT", - "DEL", - -}; - -typedef struct my_mutator { - - afl_t *afl; - - // any additional data here! - size_t trim_size_current; - int trimmming_steps; - int cur_step; - - // Reused buffers: - BUF_VAR(u8, fuzz); - BUF_VAR(u8, data); - BUF_VAR(u8, havoc); - BUF_VAR(u8, trim); - BUF_VAR(u8, post_process); - -} my_mutator_t; - -/** - * Initialize this custom mutator - * - * @param[in] afl a pointer to the internal state object. Can be ignored for - * now. - * @param[in] seed A seed for this mutator - the same seed should always mutate - * in the same way. - * @return Pointer to the data object this custom mutator instance should use. - * There may be multiple instances of this mutator in one afl-fuzz run! - * Return NULL on error. - */ -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { - - srand(seed); // needed also by surgical_havoc_mutate() - - my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); - if (!data) { - - perror("afl_custom_init alloc"); - return NULL; - - } - - data->afl = afl; - - return data; - -} - -/** - * Perform custom mutations on a given input - * - * (Optional for now. Required in the future) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[in] buf Pointer to input data to be mutated - * @param[in] buf_size Size of input data - * @param[out] out_buf the buffer we will work on. we can reuse *buf. NULL on - * error. - * @param[in] add_buf Buffer containing the additional test case - * @param[in] add_buf_size Size of the additional test case - * @param[in] max_size Maximum size of the mutated output. The mutation must not - * produce data larger than max_size. - * @return Size of the mutated output. - */ -size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, // add_buf can be NULL - size_t max_size) { - - // Make sure that the packet size does not exceed the maximum size expected by - // the fuzzer - size_t mutated_size = DATA_SIZE <= max_size ? DATA_SIZE : max_size; - - // maybe_grow is optimized to be quick for reused buffers. - u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), mutated_size); - if (!mutated_out) { - - *out_buf = NULL; - perror("custom mutator allocation (maybe_grow)"); - return 0; /* afl-fuzz will very likely error out after this. */ - - } - - // Randomly select a command string to add as a header to the packet - memcpy(mutated_out, commands[rand() % 3], 3); - - // Mutate the payload of the packet - int i; - for (i = 0; i < 8; ++i) { - - // Randomly perform one of the (no len modification) havoc mutations - surgical_havoc_mutate(mutated_out, 3, mutated_size); - - } - - *out_buf = mutated_out; - return mutated_size; - -} - -/** - * A post-processing function to use right before AFL writes the test case to - * disk in order to execute the target. - * - * (Optional) If this functionality is not needed, simply don't define this - * function. - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[in] buf Buffer containing the test case to be executed - * @param[in] buf_size Size of the test case - * @param[out] out_buf Pointer to the buffer containing the test case after - * processing. External library should allocate memory for out_buf. - * The buf pointer may be reused (up to the given buf_size); - * @return Size of the output buffer after processing or the needed amount. - * A return of 0 indicates an error. - */ -size_t afl_custom_post_process(my_mutator_t *data, uint8_t *buf, - size_t buf_size, uint8_t **out_buf) { - - uint8_t *post_process_buf = - maybe_grow(BUF_PARAMS(data, post_process), buf_size + 5); - if (!post_process_buf) { - - perror("custom mutator realloc failed."); - *out_buf = NULL; - return 0; - - } - - memcpy(post_process_buf + 5, buf, buf_size); - post_process_buf[0] = 'A'; - post_process_buf[1] = 'F'; - post_process_buf[2] = 'L'; - post_process_buf[3] = '+'; - post_process_buf[4] = '+'; - - *out_buf = post_process_buf; - - return buf_size + 5; - -} - -/** - * This method is called at the start of each trimming operation and receives - * the initial buffer. It should return the amount of iteration steps possible - * on this input (e.g. if your input has n elements and you want to remove - * them one by one, return n, if you do a binary search, return log(n), - * and so on...). - * - * If your trimming algorithm doesn't allow you to determine the amount of - * (remaining) steps easily (esp. while running), then you can alternatively - * return 1 here and always return 0 in post_trim until you are finished and - * no steps remain. In that case, returning 1 in post_trim will end the - * trimming routine. The whole current index/max iterations stuff is only used - * to show progress. - * - * (Optional) - * - * @param data pointer returned in afl_custom_init for this fuzz case - * @param buf Buffer containing the test case - * @param buf_size Size of the test case - * @return The amount of possible iteration steps to trim the input. - * negative on error. - */ -int32_t afl_custom_init_trim(my_mutator_t *data, uint8_t *buf, - size_t buf_size) { - - // We simply trim once - data->trimmming_steps = 1; - - data->cur_step = 0; - - if (!maybe_grow(BUF_PARAMS(data, trim), buf_size)) { - - perror("init_trim grow"); - return -1; - - } - - memcpy(data->trim_buf, buf, buf_size); - - data->trim_size_current = buf_size; - - return data->trimmming_steps; - -} - -/** - * This method is called for each trimming operation. It doesn't have any - * arguments because we already have the initial buffer from init_trim and we - * can memorize the current state in *data. This can also save - * reparsing steps for each iteration. It should return the trimmed input - * buffer, where the returned data must not exceed the initial input data in - * length. Returning anything that is larger than the original data (passed - * to init_trim) will result in a fatal abort of AFLFuzz. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[out] out_buf Pointer to the buffer containing the trimmed test case. - * External library should allocate memory for out_buf. - * AFL++ will not release the memory after saving the test case. - * Keep a ref in *data. - * *out_buf = NULL is treated as error. - * @return Pointer to the size of the trimmed test case - */ -size_t afl_custom_trim(my_mutator_t *data, uint8_t **out_buf) { - - *out_buf = data->trim_buf; - - // Remove the last byte of the trimming input - return data->trim_size_current - 1; - -} - -/** - * This method is called after each trim operation to inform you if your - * trimming step was successful or not (in terms of coverage). If you receive - * a failure here, you should reset your input to the last known good state. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param success Indicates if the last trim operation was successful. - * @return The next trim iteration index (from 0 to the maximum amount of - * steps returned in init_trim). negative ret on failure. - */ -int32_t afl_custom_post_trim(my_mutator_t *data, int success) { - - if (success) { - - ++data->cur_step; - return data->cur_step; - - } - - return data->trimmming_steps; - -} - -/** - * Perform a single custom mutation on a given input. - * This mutation is stacked with the other muatations in havoc. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[in] buf Pointer to the input data to be mutated and the mutated - * output - * @param[in] buf_size Size of input data - * @param[out] out_buf The output buffer. buf can be reused, if the content - * fits. *out_buf = NULL is treated as error. - * @param[in] max_size Maximum size of the mutated output. The mutation must - * not produce data larger than max_size. - * @return Size of the mutated output. - */ -size_t afl_custom_havoc_mutation(my_mutator_t *data, u8 *buf, size_t buf_size, - u8 **out_buf, size_t max_size) { - - if (buf_size == 0) { - - *out_buf = maybe_grow(BUF_PARAMS(data, havoc), 1); - if (!*out_buf) { - - perror("custom havoc: maybe_grow"); - return 0; - - } - - **out_buf = rand() % 256; - buf_size = 1; - - } else { - - // We reuse buf here. It's legal and faster. - *out_buf = buf; - - } - - size_t victim = rand() % buf_size; - (*out_buf)[victim] += rand() % 10; - - return buf_size; - -} - -/** - * Return the probability (in percentage) that afl_custom_havoc_mutation - * is called in havoc. By default it is 6 %. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @return The probability (0-100). - */ -uint8_t afl_custom_havoc_mutation_probability(my_mutator_t *data) { - - return 5; // 5 % - -} - -/** - * Determine whether the fuzzer should fuzz the queue entry or not. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param filename File name of the test case in the queue entry - * @return Return True(1) if the fuzzer will fuzz the queue entry, and - * False(0) otherwise. - */ -uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) { - - return 1; - -} - -/** - * Allow for additional analysis (e.g. calling a different tool that does a - * different kind of coverage and saves this for the custom mutator). - * - * (Optional) - * - * @param data pointer returned in afl_custom_init for this fuzz case - * @param filename_new_queue File name of the new queue entry - * @param filename_orig_queue File name of the original queue entry - */ -void afl_custom_queue_new_entry(my_mutator_t * data, - const uint8_t *filename_new_queue, - const uint8_t *filename_orig_queue) { - - /* Additional analysis on the original or new test case */ - -} - -/** - * Deinitialize everything - * - * @param data The data ptr from afl_custom_init - */ -void afl_custom_deinit(my_mutator_t *data) { - - free(data->post_process_buf); - free(data->havoc_buf); - free(data->data_buf); - free(data->fuzz_buf); - free(data->trim_buf); - free(data); - -} - diff --git a/utils/custom_mutators/example.py b/utils/custom_mutators/example.py deleted file mode 100644 index 3a6d22e4..00000000 --- a/utils/custom_mutators/example.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -Example Python Module for AFLFuzz - -@author: Christian Holler (:decoder) - -@license: - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. - -@contact: choller@mozilla.com -""" - -import random - - -COMMANDS = [ - b"GET", - b"PUT", - b"DEL", - b"AAAAAAAAAAAAAAAAA", -] - - -def init(seed): - """ - Called once when AFLFuzz starts up. Used to seed our RNG. - - @type seed: int - @param seed: A 32-bit random value - """ - random.seed(seed) - - -def deinit(): - pass - - -def fuzz(buf, add_buf, max_size): - """ - Called per fuzzing iteration. - - @type buf: bytearray - @param buf: The buffer that should be mutated. - - @type add_buf: bytearray - @param add_buf: A second buffer that can be used as mutation source. - - @type max_size: int - @param max_size: Maximum size of the mutated output. The mutation must not - produce data larger than max_size. - - @rtype: bytearray - @return: A new bytearray containing the mutated data - """ - ret = bytearray(100) - - ret[:3] = random.choice(COMMANDS) - - return ret - - -# Uncomment and implement the following methods if you want to use a custom -# trimming algorithm. See also the documentation for a better API description. - -# def init_trim(buf): -# ''' -# Called per trimming iteration. -# -# @type buf: bytearray -# @param buf: The buffer that should be trimmed. -# -# @rtype: int -# @return: The maximum number of trimming steps. -# ''' -# global ... -# -# # Initialize global variables -# -# # Figure out how many trimming steps are possible. -# # If this is not possible for your trimming, you can -# # return 1 instead and always return 0 in post_trim -# # until you are done (then you return 1). -# -# return steps -# -# def trim(): -# ''' -# Called per trimming iteration. -# -# @rtype: bytearray -# @return: A new bytearray containing the trimmed data. -# ''' -# global ... -# -# # Implement the actual trimming here -# -# return bytearray(...) -# -# def post_trim(success): -# ''' -# Called after each trimming operation. -# -# @type success: bool -# @param success: Indicates if the last trim operation was successful. -# -# @rtype: int -# @return: The next trim index (0 to max number of steps) where max -# number of steps indicates the trimming is done. -# ''' -# global ... -# -# if not success: -# # Restore last known successful input, determine next index -# else: -# # Just determine the next index, based on what was successfully -# # removed in the last step -# -# return next_index -# -# def post_process(buf): -# ''' -# Called just before the execution to write the test case in the format -# expected by the target -# -# @type buf: bytearray -# @param buf: The buffer containing the test case to be executed -# -# @rtype: bytearray -# @return: The buffer containing the test case after -# ''' -# return buf -# -# def havoc_mutation(buf, max_size): -# ''' -# Perform a single custom mutation on a given input. -# -# @type buf: bytearray -# @param buf: The buffer that should be mutated. -# -# @type max_size: int -# @param max_size: Maximum size of the mutated output. The mutation must not -# produce data larger than max_size. -# -# @rtype: bytearray -# @return: A new bytearray containing the mutated data -# ''' -# return mutated_buf -# -# def havoc_mutation_probability(): -# ''' -# Called for each `havoc_mutation`. Return the probability (in percentage) -# that `havoc_mutation` is called in havoc. Be default it is 6%. -# -# @rtype: int -# @return: The probability (0-100) -# ''' -# return prob -# -# def queue_get(filename): -# ''' -# Called at the beginning of each fuzz iteration to determine whether the -# test case should be fuzzed -# -# @type filename: str -# @param filename: File name of the test case in the current queue entry -# -# @rtype: bool -# @return: Return True if the custom mutator decides to fuzz the test case, -# and False otherwise -# ''' -# return True -# -# def queue_new_entry(filename_new_queue, filename_orig_queue): -# ''' -# Called after adding a new test case to the queue -# -# @type filename_new_queue: str -# @param filename_new_queue: File name of the new queue entry -# -# @type filename_orig_queue: str -# @param filename_orig_queue: File name of the original queue entry -# ''' -# pass diff --git a/utils/custom_mutators/post_library_gif.so.c b/utils/custom_mutators/post_library_gif.so.c deleted file mode 100644 index ac10f409..00000000 --- a/utils/custom_mutators/post_library_gif.so.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - american fuzzy lop++ - postprocessor library example - -------------------------------------------------- - - Originally written by Michal Zalewski - Edited by Dominik Maier, 2020 - - Copyright 2015 Google Inc. All rights reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - Postprocessor libraries can be passed to afl-fuzz to perform final cleanup - of any mutated test cases - for example, to fix up checksums in PNG files. - - Please heed the following warnings: - - 1) In almost all cases, it is more productive to comment out checksum logic - in the targeted binary (as shown in ../libpng_no_checksum/). One possible - exception is the process of fuzzing binary-only software in QEMU mode. - - 2) The use of postprocessors for anything other than checksums is - questionable and may cause more harm than good. AFL is normally pretty good - about dealing with length fields, magic values, etc. - - 3) Postprocessors that do anything non-trivial must be extremely robust to - gracefully handle malformed data and other error conditions - otherwise, - they will crash and take afl-fuzz down with them. Be wary of reading past - *len and of integer overflows when calculating file offsets. - - In other words, THIS IS PROBABLY NOT WHAT YOU WANT - unless you really, - honestly know what you're doing =) - - With that out of the way: the postprocessor library is passed to afl-fuzz - via AFL_POST_LIBRARY. The library must be compiled with: - - gcc -shared -Wall -O3 post_library.so.c -o post_library.so - - AFL will call the afl_custom_post_process() function for every mutated output - buffer. From there, you have three choices: - - 1) If you don't want to modify the test case, simply set `*out_buf = in_buf` - and return the original `len`. - - 2) If you want to skip this test case altogether and have AFL generate a - new one, return 0 or set `*out_buf = NULL`. - Use this sparingly - it's faster than running the target program - with patently useless inputs, but still wastes CPU time. - - 3) If you want to modify the test case, allocate an appropriately-sized - buffer, move the data into that buffer, make the necessary changes, and - then return the new pointer as out_buf. Return an appropriate len - afterwards. - - Note that the buffer will *not* be freed for you. To avoid memory leaks, - you need to free it or reuse it on subsequent calls (as shown below). - - *** Feel free to reuse the original 'in_buf' BUFFER and return it. *** - - Aight. The example below shows a simple postprocessor that tries to make - sure that all input files start with "GIF89a". - - PS. If you don't like C, you can try out the unix-based wrapper from - Ben Nagy instead: https://github.com/bnagy/aflfix - - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -/* Header that must be present at the beginning of every test case: */ - -#define HEADER "GIF89a" - -typedef struct post_state { - - unsigned char *buf; - size_t size; - -} post_state_t; - -void *afl_custom_init(void *afl) { - - post_state_t *state = malloc(sizeof(post_state_t)); - if (!state) { - - perror("malloc"); - return NULL; - - } - - state->buf = calloc(sizeof(unsigned char), 4096); - if (!state->buf) { - - free(state); - perror("calloc"); - return NULL; - - } - - return state; - -} - -/* The actual postprocessor routine called by afl-fuzz: */ - -size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, - unsigned int len, unsigned char **out_buf) { - - /* Skip execution altogether for buffers shorter than 6 bytes (just to - show how it's done). We can trust len to be sane. */ - - if (len < strlen(HEADER)) return 0; - - /* Do nothing for buffers that already start with the expected header. */ - - if (!memcmp(in_buf, HEADER, strlen(HEADER))) { - - *out_buf = in_buf; - return len; - - } - - /* Allocate memory for new buffer, reusing previous allocation if - possible. */ - - *out_buf = realloc(data->buf, len); - - /* If we're out of memory, the most graceful thing to do is to return the - original buffer and give up on modifying it. Let AFL handle OOM on its - own later on. */ - - if (!*out_buf) { - - *out_buf = in_buf; - return len; - - } - - /* Copy the original data to the new location. */ - - memcpy(*out_buf, in_buf, len); - - /* Insert the new header. */ - - memcpy(*out_buf, HEADER, strlen(HEADER)); - - /* Return the new len. It hasn't changed, so it's just len. */ - - return len; - -} - -/* Gets called afterwards */ -void afl_custom_deinit(post_state_t *data) { - - free(data->buf); - free(data); - -} - diff --git a/utils/custom_mutators/post_library_png.so.c b/utils/custom_mutators/post_library_png.so.c deleted file mode 100644 index 941f7e55..00000000 --- a/utils/custom_mutators/post_library_png.so.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - american fuzzy lop++ - postprocessor for PNG - ------------------------------------------ - - Originally written by Michal Zalewski - - Copyright 2015 Google Inc. All rights reserved. - Adapted to the new API, 2020 by Dominik Maier - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - See post_library.so.c for a general discussion of how to implement - postprocessors. This specific postprocessor attempts to fix up PNG - checksums, providing a slightly more complicated example than found - in post_library.so.c. - - Compile with: - - gcc -shared -Wall -O3 post_library_png.so.c -o post_library_png.so -lz - - */ - -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <string.h> -#include <zlib.h> - -#include <arpa/inet.h> - -/* A macro to round an integer up to 4 kB. */ - -#define UP4K(_i) ((((_i) >> 12) + 1) << 12) - -typedef struct post_state { - - unsigned char *buf; - size_t size; - -} post_state_t; - -void *afl_custom_init(void *afl) { - - post_state_t *state = malloc(sizeof(post_state_t)); - if (!state) { - - perror("malloc"); - return NULL; - - } - - state->buf = calloc(sizeof(unsigned char), 4096); - if (!state->buf) { - - free(state); - perror("calloc"); - return NULL; - - } - - return state; - -} - -size_t afl_custom_post_process(post_state_t *data, const unsigned char *in_buf, - unsigned int len, - const unsigned char **out_buf) { - - unsigned char *new_buf = (unsigned char *)in_buf; - unsigned int pos = 8; - - /* Don't do anything if there's not enough room for the PNG header - (8 bytes). */ - - if (len < 8) { - - *out_buf = in_buf; - return len; - - } - - /* Minimum size of a zero-length PNG chunk is 12 bytes; if we - don't have that, we can bail out. */ - - while (pos + 12 <= len) { - - unsigned int chunk_len, real_cksum, file_cksum; - - /* Chunk length is the first big-endian dword in the chunk. */ - - chunk_len = ntohl(*(uint32_t *)(in_buf + pos)); - - /* Bail out if chunk size is too big or goes past EOF. */ - - if (chunk_len > 1024 * 1024 || pos + 12 + chunk_len > len) break; - - /* Chunk checksum is calculated for chunk ID (dword) and the actual - payload. */ - - real_cksum = htonl(crc32(0, in_buf + pos + 4, chunk_len + 4)); - - /* The in-file checksum is the last dword past the chunk data. */ - - file_cksum = *(uint32_t *)(in_buf + pos + 8 + chunk_len); - - /* If the checksums do not match, we need to fix the file. */ - - if (real_cksum != file_cksum) { - - /* First modification? Make a copy of the input buffer. Round size - up to 4 kB to minimize the number of reallocs needed. */ - - if (new_buf == in_buf) { - - if (len <= data->size) { - - new_buf = data->buf; - - } else { - - new_buf = realloc(data->buf, UP4K(len)); - if (!new_buf) { - - *out_buf = in_buf; - return len; - - } - - data->buf = new_buf; - data->size = UP4K(len); - memcpy(new_buf, in_buf, len); - - } - - } - - *(uint32_t *)(new_buf + pos + 8 + chunk_len) = real_cksum; - - } - - /* Skip the entire chunk and move to the next one. */ - - pos += 12 + chunk_len; - - } - - *out_buf = new_buf; - return len; - -} - -/* Gets called afterwards */ -void afl_custom_deinit(post_state_t *data) { - - free(data->buf); - free(data); - -} - diff --git a/utils/custom_mutators/simple-chunk-replace.py b/utils/custom_mutators/simple-chunk-replace.py deleted file mode 100644 index c57218dd..00000000 --- a/utils/custom_mutators/simple-chunk-replace.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -Simple Chunk Cross-Over Replacement Module for AFLFuzz - -@author: Christian Holler (:decoder) - -@license: - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. - -@contact: choller@mozilla.com -""" - -import random - - -def init(seed): - """ - Called once when AFLFuzz starts up. Used to seed our RNG. - - @type seed: int - @param seed: A 32-bit random value - """ - # Seed our RNG - random.seed(seed) - - -def fuzz(buf, add_buf, max_size): - """ - Called per fuzzing iteration. - - @type buf: bytearray - @param buf: The buffer that should be mutated. - - @type add_buf: bytearray - @param add_buf: A second buffer that can be used as mutation source. - - @type max_size: int - @param max_size: Maximum size of the mutated output. The mutation must not - produce data larger than max_size. - - @rtype: bytearray - @return: A new bytearray containing the mutated data - """ - # Make a copy of our input buffer for returning - ret = bytearray(buf) - - # Take a random fragment length between 2 and 32 (or less if add_buf is shorter) - fragment_len = random.randint(1, min(len(add_buf), 32)) - - # Determine a random source index where to take the data chunk from - rand_src_idx = random.randint(0, len(add_buf) - fragment_len) - - # Determine a random destination index where to put the data chunk - rand_dst_idx = random.randint(0, len(buf)) - - # Make the chunk replacement - ret[rand_dst_idx : rand_dst_idx + fragment_len] = add_buf[ - rand_src_idx : rand_src_idx + fragment_len - ] - - # Return data - return ret diff --git a/utils/custom_mutators/simple_example.c b/utils/custom_mutators/simple_example.c deleted file mode 100644 index d888ec1f..00000000 --- a/utils/custom_mutators/simple_example.c +++ /dev/null @@ -1,74 +0,0 @@ -// This simple example just creates random buffer <= 100 filled with 'A' -// needs -I /path/to/AFLplusplus/include -#include "custom_mutator_helpers.h" - -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> - -#ifndef _FIXED_CHAR - #define _FIXED_CHAR 0x41 -#endif - -typedef struct my_mutator { - - afl_t *afl; - - // Reused buffers: - BUF_VAR(u8, fuzz); - -} my_mutator_t; - -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { - - srand(seed); - my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); - if (!data) { - - perror("afl_custom_init alloc"); - return NULL; - - } - - data->afl = afl; - - return data; - -} - -size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, // add_buf can be NULL - size_t max_size) { - - int size = (rand() % 100) + 1; - if (size > max_size) size = max_size; - u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), size); - if (!mutated_out) { - - *out_buf = NULL; - perror("custom mutator allocation (maybe_grow)"); - return 0; /* afl-fuzz will very likely error out after this. */ - - } - - memset(mutated_out, _FIXED_CHAR, size); - - *out_buf = mutated_out; - return size; - -} - -/** - * Deinitialize everything - * - * @param data The data ptr from afl_custom_init - */ -void afl_custom_deinit(my_mutator_t *data) { - - free(data->fuzz_buf); - free(data); - -} - diff --git a/utils/custom_mutators/wrapper_afl_min.py b/utils/custom_mutators/wrapper_afl_min.py deleted file mode 100644 index 5cd60031..00000000 --- a/utils/custom_mutators/wrapper_afl_min.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python - -from XmlMutatorMin import XmlMutatorMin - -# Default settings (production mode) - -__mutator__ = None -__seed__ = "RANDOM" -__log__ = False -__log_file__ = "wrapper.log" - - -# AFL functions -def log(text): - """ - Logger - """ - - global __seed__ - global __log__ - global __log_file__ - - if __log__: - with open(__log_file__, "a") as logf: - logf.write("[%s] %s\n" % (__seed__, text)) - - -def init(seed): - """ - Called once when AFL starts up. Seed is used to identify the AFL instance in log files - """ - - global __mutator__ - global __seed__ - - # Get the seed - __seed__ = seed - - # Create a global mutation class - try: - __mutator__ = XmlMutatorMin(__seed__, verbose=__log__) - log("init(): Mutator created") - except RuntimeError as e: - log("init(): Can't create mutator: %s" % e.message) - - -def fuzz(buf, add_buf, max_size): - """ - Called for each fuzzing iteration. - """ - - global __mutator__ - - # Do we have a working mutator object? - if __mutator__ is None: - log("fuzz(): Can't fuzz, no mutator available") - return buf - - # Try to use the AFL buffer - via_buffer = True - - # Interpret the AFL buffer (an array of bytes) as a string - if via_buffer: - try: - buf_str = str(buf) - log("fuzz(): AFL buffer converted to a string") - except Exception: - via_buffer = False - log("fuzz(): Can't convert AFL buffer to a string") - - # Load XML from the AFL string - if via_buffer: - try: - __mutator__.init_from_string(buf_str) - log( - "fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" - % len(buf_str) - ) - except Exception: - via_buffer = False - log("fuzz(): Can't initialize mutator with AFL buffer") - - # If init from AFL buffer wasn't succesful - if not via_buffer: - log("fuzz(): Returning unmodified AFL buffer") - return buf - - # Sucessful initialization -> mutate - try: - __mutator__.mutate(max=5) - log("fuzz(): Input mutated") - except Exception: - log("fuzz(): Can't mutate input => returning buf") - return buf - - # Convert mutated data to a array of bytes - try: - data = bytearray(__mutator__.save_to_string()) - log("fuzz(): Mutated data converted as bytes") - except Exception: - log("fuzz(): Can't convert mutated data to bytes => returning buf") - return buf - - # Everything went fine, returning mutated content - log("fuzz(): Returning %d bytes" % len(data)) - return data - - -# Main (for debug) -if __name__ == "__main__": - - __log__ = True - __log_file__ = "/dev/stdout" - __seed__ = "RANDOM" - - init(__seed__) - - in_1 = bytearray( - "<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>" - ) - in_2 = bytearray("<abc abc123='456' abcCBA='ppppppppppppppppppppppppppppp'/>") - out = fuzz(in_1, in_2) - print(out) diff --git a/utils/libdislocator/libdislocator.so.c b/utils/libdislocator/libdislocator.so.c index 1b247c86..dde78f7b 100644 --- a/utils/libdislocator/libdislocator.so.c +++ b/utils/libdislocator/libdislocator.so.c @@ -144,8 +144,8 @@ typedef struct { /* Configurable stuff (use AFL_LD_* to set): */ -static u32 max_mem = MAX_ALLOC; /* Max heap usage to permit */ -static u8 alloc_verbose, /* Additional debug messages */ +static size_t max_mem = MAX_ALLOC; /* Max heap usage to permit */ +static u8 alloc_verbose, /* Additional debug messages */ hard_fail, /* abort() when max_mem exceeded? */ no_calloc_over, /* abort() on calloc() overflows? */ align_allocations; /* Force alignment to sizeof(void*) */ @@ -154,7 +154,7 @@ static u8 alloc_verbose, /* Additional debug messages */ #define __thread #warning no thread support available #endif -static __thread size_t total_mem; /* Currently allocated mem */ +static _Atomic size_t total_mem; /* Currently allocated mem */ static __thread u32 call_depth; /* To avoid recursion via fprintf() */ static u32 alloc_canary; @@ -172,9 +172,9 @@ static void *__dislocator_alloc(size_t len) { if (total_mem + len > max_mem || total_mem + len < total_mem) { - if (hard_fail) FATAL("total allocs exceed %u MB", max_mem / 1024 / 1024); + if (hard_fail) FATAL("total allocs exceed %zu MB", max_mem / 1024 / 1024); - DEBUGF("total allocs exceed %u MB, returning NULL", max_mem / 1024 / 1024); + DEBUGF("total allocs exceed %zu MB, returning NULL", max_mem / 1024 / 1024); return NULL; @@ -500,19 +500,20 @@ size_t malloc_usable_size(const void *ptr) { __attribute__((constructor)) void __dislocator_init(void) { - u8 *tmp = (u8 *)getenv("AFL_LD_LIMIT_MB"); + char *tmp = getenv("AFL_LD_LIMIT_MB"); if (tmp) { - u8 *tok; - s32 mmem = (s32)strtol((char *)tmp, (char **)&tok, 10); - if (*tok != '\0' || errno == ERANGE) FATAL("Bad value for AFL_LD_LIMIT_MB"); + char * tok; + unsigned long long mmem = strtoull(tmp, &tok, 10); + if (*tok != '\0' || errno == ERANGE || mmem > SIZE_MAX / 1024 / 1024) + FATAL("Bad value for AFL_LD_LIMIT_MB"); max_mem = mmem * 1024 * 1024; } alloc_canary = ALLOC_CANARY; - tmp = (u8 *)getenv("AFL_RANDOM_ALLOC_CANARY"); + tmp = getenv("AFL_RANDOM_ALLOC_CANARY"); if (tmp) arc4random_buf(&alloc_canary, sizeof(alloc_canary)); diff --git a/utils/qbdi_mode/template.cpp b/utils/qbdi_mode/template.cpp index b2066cc8..182a014b 100755 --- a/utils/qbdi_mode/template.cpp +++ b/utils/qbdi_mode/template.cpp @@ -25,7 +25,7 @@ #if (defined(__x86_64__) || defined(__i386__)) && defined(AFL_QEMU_NOT_ZERO) #define INC_AFL_AREA(loc) \ asm volatile( \ - "incb (%0, %1, 1)\n" \ + "addb $1, (%0, %1, 1)\n" \ "adcb $0, (%0, %1, 1)\n" \ : /* no out */ \ : "r"(afl_area_ptr), "r"(loc) \ diff --git a/utils/qemu_persistent_hook/read_into_rdi.c b/utils/qemu_persistent_hook/read_into_rdi.c index f4a8ae59..14b2ed85 100644 --- a/utils/qemu_persistent_hook/read_into_rdi.c +++ b/utils/qemu_persistent_hook/read_into_rdi.c @@ -3,12 +3,12 @@ #include <stdio.h> #include <string.h> -void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_buf_len) { -\ #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) #define h2g(x) ((uint64_t)(x)-guest_base) +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + // In this example the register RDI is pointing to the memory location // of the target buffer, and the length of the input is in RSI. // This can be seen with a debugger, e.g. gdb (and "disass main") @@ -19,11 +19,11 @@ void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, memcpy(g2h(regs->rdi), input_buf, input_buf_len); regs->rsi = input_buf_len; +} + #undef g2h #undef h2g -} - int afl_persistent_hook_init(void) { // 1 for shared memory input (faster), 0 for normal input (you have to use |