From 845c584b9cee7092772305912508b825155142fa Mon Sep 17 00:00:00 2001 From: begasus Date: Sun, 4 Apr 2021 17:41:43 +0000 Subject: Fix Haiku references, no and missing defines for USEMMAP --- utils/afl_network_proxy/afl-network-server.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/afl_network_proxy/afl-network-server.c b/utils/afl_network_proxy/afl-network-server.c index 0dfae658..60f174ee 100644 --- a/utils/afl_network_proxy/afl-network-server.c +++ b/utils/afl_network_proxy/afl-network-server.c @@ -45,7 +45,6 @@ #include #include -#include #include #include #include @@ -53,7 +52,9 @@ #include #include #include -#include +#ifndef USEMMAP + #include +#endif #include #include -- cgit v1.2.3 From 6069cac313f4f8f4e696e815d4fe2f8bcaccccf4 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 14 Apr 2021 18:24:55 +0200 Subject: qemu driver new api --- utils/aflpp_driver/aflpp_qemu_driver_hook.c | 21 +++++++++++++++------ utils/qemu_persistent_hook/read_into_rdi.c | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/aflpp_qemu_driver_hook.c b/utils/aflpp_driver/aflpp_qemu_driver_hook.c index 823cc42d..d3dd98b0 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver_hook.c +++ b/utils/aflpp_driver/aflpp_qemu_driver_hook.c @@ -1,21 +1,30 @@ +#include "../../qemu_mode/qemuafl/qemuafl/api.h" + #include #include +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) +#define h2g(x) ((uint64_t)(x)-guest_base) -#define REGS_RDI 7 -#define REGS_RSI 6 + // In this example the register RDI is pointing to the memory location + // of the target buffer, and the length of the input is in RSI. + // This can be seen with a debugger, e.g. gdb (and "disass main") -void afl_persistent_hook(uint64_t *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_len) { + memcpy(g2h(regs->rdi), input_buf, input_buf_len); + regs->rsi = input_buf_len; - memcpy(g2h(regs[REGS_RDI]), input_buf, input_len); - regs[REGS_RSI] = input_len; +#undef g2h +#undef h2g } int afl_persistent_hook_init(void) { + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) return 1; } diff --git a/utils/qemu_persistent_hook/read_into_rdi.c b/utils/qemu_persistent_hook/read_into_rdi.c index f4a8ae59..c1c6642f 100644 --- a/utils/qemu_persistent_hook/read_into_rdi.c +++ b/utils/qemu_persistent_hook/read_into_rdi.c @@ -5,7 +5,7 @@ void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, uint8_t *input_buf, uint32_t input_buf_len) { -\ + #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) #define h2g(x) ((uint64_t)(x)-guest_base) -- cgit v1.2.3 From f0d300b32a8a5b3adccc8209c151382244135082 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 18:36:22 +0200 Subject: add readme --- utils/aflpp_driver/README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 utils/aflpp_driver/README.md (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md new file mode 100644 index 00000000..2c339d12 --- /dev/null +++ b/utils/aflpp_driver/README.md @@ -0,0 +1,25 @@ +# afl++ drivers + +## aflpp_driver + +aflpp_driver is used to compile directly libfuzzer `LLVMFuzzerTestOneInput()` +targets. + +Just do `afl-clang-fast++ -o fuzz fuzzer_harness.cc libAFLDriver.a [plus required linking]`. + +You can also sneakily do this little trick: +If this is the clang compile command to build for libfuzzer: + `clang++ -o fuzz -fsanitize=fuzzer fuzzer_harness.cc -lfoo` +then just switch `clang++` with `afl-clang-fast++` and our compiler will +magically insert libAFLDriver.a :) + + +## aflpp_qemu_driver + +aflpp_qemu_driver is used for libfuzzer `LLVMFuzzerTestOneInput()` targets that +are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without +-fsantize=fuzzer or afl-clang-fast, and link in libAFLQemuDriver.a: + +`clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. + +Then just do `AFL_PRELOAD=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` -- cgit v1.2.3 From fd8dc1455278bca16e852eb08ddac9a3e466b5c7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 18:49:02 +0200 Subject: update readme --- utils/aflpp_driver/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md index 2c339d12..01bd10c0 100644 --- a/utils/aflpp_driver/README.md +++ b/utils/aflpp_driver/README.md @@ -22,4 +22,9 @@ are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without `clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. -Then just do `AFL_PRELOAD=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` + +Then just do (where the name of the binary is `fuzz`): +``` +AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}') +AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` +``` -- cgit v1.2.3 From 4a0e0270adafbc583d491dfad74d9378a4c06bf7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 22:23:16 +0200 Subject: allow aflpp_qemu_driver_hook.o to fail --- utils/aflpp_driver/GNUmakefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index c1a087d7..8ac054a6 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -26,17 +26,17 @@ debug: ar ru libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c - $(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c + -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o - ar ru libAFLQemuDriver.a aflpp_qemu_driver.o - cp -vf libAFLQemuDriver.a ../../ + -ar ru libAFLQemuDriver.a aflpp_qemu_driver.o + -cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so + -$(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - $(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c + -$(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c -- cgit v1.2.3 From c8e96e52536d47ee41967657202574d8e61562ee Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 15 Apr 2021 23:56:58 +0200 Subject: autoformat with black --- utils/autodict_ql/autodict-ql.py | 154 +++++++++++++++++++---------------- utils/autodict_ql/litan.py | 126 ++++++++++++++++------------ utils/autodict_ql/memcmp-strings.py | 64 ++++++++++----- utils/autodict_ql/stan-strings.py | 64 ++++++++++----- utils/autodict_ql/strcmp-strings.py | 64 ++++++++++----- utils/autodict_ql/strncmp-strings.py | 64 ++++++++++----- 6 files changed, 326 insertions(+), 210 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 0fe7eabf..f64e3fae 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -11,7 +11,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -21,6 +21,7 @@ import subprocess from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -28,109 +29,118 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - - #parser.add_argument("tokenpath", - #help="Destination directory for tokens") - parser.add_argument("cur", - help = "Current Path") - parser.add_argument("db", - help = "CodeQL database Path") - parser.add_argument("tokenpath", - help="Destination directory for tokens") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + + # parser.add_argument("tokenpath", + # help="Destination directory for tokens") + parser.add_argument("cur", help="Current Path") + parser.add_argument("db", help="CodeQL database Path") + parser.add_argument("tokenpath", help="Destination directory for tokens") return parser.parse_args() -def static_analysis(file,file2,cur,db) : - with open(cur+"/"+file, "w") as f: - print(cur+"/"+file) - stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + +def static_analysis(file, file2, cur, db): + with open(cur + "/" + file, "w") as f: + print(cur + "/" + file) + stream = os.popen("codeql query run " + cur + "/" + file2 + " -d " + db) output = stream.read() f.write(output) f.close() -def copy_tokens(cur, tokenpath) : - subprocess.call(["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"],shell=True) - subprocess.call(["rm *.out"],shell=True) - subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) - - - -def codeql_analysis(cur, db) : - static_analysis("litout.out","litool.ql", cur, db) - static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) - static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("strtool-strings.out","strtool.ql", cur, db) - start_autodict(0,cur) +def copy_tokens(cur, tokenpath): + subprocess.call( + ["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."], shell=True + ) + subprocess.call( + ["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"], shell=True + ) + subprocess.call(["rm *.out"], shell=True) + subprocess.call(["find " + tokenpath + " -size 0 -delete"], shell=True) + + +def codeql_analysis(cur, db): + static_analysis("litout.out", "litool.ql", cur, db) + static_analysis("strcmp-strings.out", "strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out", "strncmp-str.ql", cur, db) + static_analysis("memcmp-strings.out", "memcmp-str.ql", cur, db) + static_analysis("strtool-strings.out", "strtool.ql", cur, db) + start_autodict(0, cur) def start_autodict(tokenpath, cur): - command = [ - 'python3', - cur + '/litan.py', - cur+'/lits/', - cur+'/litout.out' - ] + command = ["python3", cur + "/litan.py", cur + "/lits/", cur + "/litout.out"] worker1 = subprocess.Popen(command) print(worker1.communicate()) - + command1 = [ - 'python3', - cur + '/strcmp-strings.py', - cur + '/strcmp-strs/', - cur + '/strcmp-strings.out' - ] + "python3", + cur + "/strcmp-strings.py", + cur + "/strcmp-strs/", + cur + "/strcmp-strings.out", + ] worker2 = subprocess.Popen(command1) print(worker2.communicate()) command2 = [ - 'python3', - cur + '/strncmp-strings.py', - cur + '/strncmp-strs/', - cur + '/strncmp-strings.out' - ] + "python3", + cur + "/strncmp-strings.py", + cur + "/strncmp-strs/", + cur + "/strncmp-strings.out", + ] worker3 = subprocess.Popen(command2) print(worker3.communicate()) - - command5 = [ - 'python3', - cur + '/memcmp-strings.py', - cur + '/memcmp-strs/', - cur + '/memcmp-strings.out' - ] + "python3", + cur + "/memcmp-strings.py", + cur + "/memcmp-strs/", + cur + "/memcmp-strings.out", + ] worker6 = subprocess.Popen(command5) print(worker6.communicate()) - - command8 = [ - 'python3', - cur + '/stan-strings.py', - cur + '/strtool-strs/', - cur + '/strtool-strings.out' - ] + "python3", + cur + "/stan-strings.py", + cur + "/strtool-strs/", + cur + "/strtool-strings.out", + ] worker9 = subprocess.Popen(command8) print(worker9.communicate()) - def main(): - args = parse_args() + args = parse_args() ensure_dir(args.tokenpath) - #copy_tokens(args.cur, args.tokenpath) + # copy_tokens(args.cur, args.tokenpath) codeql_analysis(args.cur, args.db) copy_tokens(args.cur, args.tokenpath) - #start_autodict(args.tokenpath, args.cur) -if __name__ == '__main__': - main() \ No newline at end of file + # start_autodict(args.tokenpath, args.cur) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py index 18c04c34..7033d363 100644 --- a/utils/autodict_ql/litan.py +++ b/utils/autodict_ql/litan.py @@ -4,7 +4,7 @@ # Author : Microsvuln - Arash.vre@gmail.com import string import os -import binascii +import binascii import codecs import struct import errno @@ -12,75 +12,101 @@ import argparse import re import base64 from binascii import unhexlify + + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt")) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate files.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt") - return parser.parse_args() + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate files." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt", + ) + return parser.parse_args() + + def ensure_dir(dir): try: os.makedirs(dir) except OSError as e: if e.errno == errno.EEXIST: - #print "[-] Directory exists, specify another directory" + # print "[-] Directory exists, specify another directory" exit(1) + + def do_analysis1(corpdir, infile): - with open(infile, "rb") as f: - lines = f.readlines()[1:] - f.close() + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() new_lst = [] n = 1 for i, num in enumerate(lines): if i != 0: - new_lst.append(num) + new_lst.append(num) str1 = str(num) - print ("num is " + str1) - str1 = str1.rstrip('\n\n') - #str1 = str1.replace("0x",""); - str1 = str1.replace("|","") - str1 = str1.rstrip('\r\n') - str1 = str1.rstrip('\n') - str1 = str1.replace(" ","") - #str1 = str1.translate(None, string.punctuation) - translator=str.maketrans('','',string.punctuation) - str1=str1.translate(translator) + print("num is " + str1) + str1 = str1.rstrip("\n\n") + # str1 = str1.replace("0x",""); + str1 = str1.replace("|", "") + str1 = str1.rstrip("\r\n") + str1 = str1.rstrip("\n") + str1 = str1.replace(" ", "") + # str1 = str1.translate(None, string.punctuation) + translator = str.maketrans("", "", string.punctuation) + str1 = str1.translate(translator) str1 = str1[1:] str1 = str1[:-1] print("After cleanup : " + str1) - if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8): - print ("first : "+str1) - if len(str1) > 8 : + if ( + (str1 != "0") + and (str1 != "ffffffff") + and (str1 != "fffffffe") + or (len(str1) == 4) + or (len(str1) == 8) + ): + print("first : " + str1) + if len(str1) > 8: str1 = str1[:-1] - elif (len(str1) == 5) : + elif len(str1) == 5: str1 = str1 = "0" try: - #str1 = str1.decode("hex") - with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file: - str1 = str1.replace("0x",""); - print (str1) - str1 = int(str1,base=16) - str1 = str1.to_bytes(4, byteorder='little') - file.write(str(str1)) - file.close() - with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q : - a = q.readline() - a = a[1:] - print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a)) - q.close() - with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 : - w1.write(str(a)) - print ("Done!") - w1.close() - except: - print("Error!") - n = n+1 + # str1 = str1.decode("hex") + with open(corpdir + "/lit-seed{0}".format(n), "w") as file: + str1 = str1.replace("0x", "") + print(str1) + str1 = int(str1, base=16) + str1 = str1.to_bytes(4, byteorder="little") + file.write(str(str1)) + file.close() + with open(corpdir + "/lit-seed{0}".format(n), "r") as q: + a = q.readline() + a = a[1:] + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token :" + + str(a) + ) + q.close() + with open( + corpdir + "/lit-seed{0}".format(n), "w" + ) as w1: + w1.write(str(a)) + print("Done!") + w1.close() + except: + print("Error!") + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_analysis1(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index d1047caa..270a697c 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/memcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index 65d08c97..81cb0b97 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/seed-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 88128dbb..9c2520c9 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/strcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 0ad0e697..6206b4c4 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/strncmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 846a46e06052c13e3036fbee05866d165adb19cc Mon Sep 17 00:00:00 2001 From: hexcoder Date: Fri, 16 Apr 2021 12:12:52 +0200 Subject: review --- utils/autodict_ql/readme.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 8c24d65c..31a20352 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -2,13 +2,13 @@ ## What is this? -`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses CodeQL (a powerful semantic code analysis engine) to fetch information from a code base. -Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . +Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campaign. If you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . ## Why CodeQL ? -We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. +We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. ## CodeQL scripts @@ -16,7 +16,7 @@ Currently, we pushed some scripts as defaults for Token generation. In addition, Currently we provided the following CodeQL scripts : -`strcmp-str.ql` is used to extract strings that are related to `strcmp` function. +`strcmp-str.ql` is used to extract strings that are related to the `strcmp` function. `strncmp-str.ql` is used to extract the strings from the `strncmp` function. @@ -24,18 +24,18 @@ Currently we provided the following CodeQL scripts : `litool.ql` extracts Magic numbers as Hexadecimal format. -`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. If `strcmp` is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. ## Usage -Before proceed to installation make sure that you have the following packages by installing them : +Before you proceed to installation make sure that you have the following packages by installing them : ```shell sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y ``` -The usage of Autodict-QL is pretty easy. But let's describe it as : +The usage of Autodict-QL is pretty easy. But let's describe it as: 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. Do the following : @@ -45,7 +45,7 @@ Do the following : # source ~/.bashrc # codeql ``` -Then you should get : +Then you should get: ```shell Usage: codeql ... @@ -73,29 +73,29 @@ Commands: github Commands useful for interacting with the GitHub API through CodeQL. ``` -2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard. + - First you need to create a CodeQL database of the project codebase, suppose we want to compile `libxml` with codeql. Go to libxml and issue the following commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. The final step is to update the CodeQL database you created in the step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory) : +3. The final step is to update the CodeQL database you created in step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory): - `codeql database upgrade /home/user/libxml/libxml-db` -4. Everything is set! Now you should issue the following to get the tokens : +4. Everything is set! Now you should issue the following to get the tokens: - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` - - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to AFL++'s `-x` flag. 5. Done! ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. -On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extract useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. In addition it's independent from LLVM system. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. For example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generate dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. +I've personally prefered to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manually generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : -- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. -- Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chances to discover new bugs. +- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. -- cgit v1.2.3