From 70403f7e1b586bf23eebb131d5db2397d708abf0 Mon Sep 17 00:00:00 2001 From: realmadsci <71108352+realmadsci@users.noreply.github.com> Date: Mon, 15 Mar 2021 14:09:54 -0400 Subject: triage_crashes.sh: Allow @@ to be part of an arg --- utils/crash_triage/triage_crashes.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index bf763cba..42cf8158 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -90,8 +90,9 @@ for crash in $DIR/crashes/id:*; do for a in $@; do - if [ "$a" = "@@" ] ; then - use_args="$use_args $crash" + if echo "$a" | grep -qF '@@'; then + escaped_fname=`echo $crash | sed 's:/:\\\\/:g'` + use_args="$use_args `echo $a | sed "s/@@/$escaped_fname/g"`" unset use_stdio else use_args="$use_args $a" -- cgit 1.4.1 From b289e7ad073d4affae76de3da5d1faeba0f8a07e Mon Sep 17 00:00:00 2001 From: realmadsci <71108352+realmadsci@users.noreply.github.com> Date: Mon, 15 Mar 2021 14:09:03 -0400 Subject: triage_crashes.sh: Fix error reporting --- utils/crash_triage/triage_crashes.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index 42cf8158..c9ca1f79 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -60,12 +60,12 @@ if fi if [ ! -f "$BIN" -o ! -x "$BIN" ]; then - echo "[-] Error: binary '$2' not found or is not executable." 1>&2 + echo "[-] Error: binary '$BIN' not found or is not executable." 1>&2 exit 1 fi if [ ! -d "$DIR/queue" ]; then - echo "[-] Error: directory '$1' not found or not created by afl-fuzz." 1>&2 + echo "[-] Error: directory '$DIR' not found or not created by afl-fuzz." 1>&2 exit 1 fi -- cgit 1.4.1 From fa349b4f4ceaa3e8309e7b01ddee6b6f895175f6 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Fri, 26 Mar 2021 17:49:20 +0100 Subject: simpler argument processing --- utils/crash_triage/triage_crashes.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index c9ca1f79..4d75430e 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -90,13 +90,15 @@ for crash in $DIR/crashes/id:*; do for a in $@; do - if echo "$a" | grep -qF '@@'; then - escaped_fname=`echo $crash | sed 's:/:\\\\/:g'` - use_args="$use_args `echo $a | sed "s/@@/$escaped_fname/g"`" + case "$a" in + *@@*) unset use_stdio - else + use_args="$use_args `printf %s "$a" | sed -e 's<@@<'$crash' Date: Wed, 31 Mar 2021 15:44:27 +0800 Subject: fix #if A == B always evalutes to true --- Android.bp | 1 - utils/afl_frida/afl-frida.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/Android.bp b/Android.bp index 64794e19..bf37757d 100644 --- a/Android.bp +++ b/Android.bp @@ -32,7 +32,6 @@ cc_defaults { target: { android_arm64: { cflags: [ - "-D__aarch64__", "-D__ANDROID__", ], }, diff --git a/utils/afl_frida/afl-frida.c b/utils/afl_frida/afl-frida.c index 711d8f33..e49d6f42 100644 --- a/utils/afl_frida/afl-frida.c +++ b/utils/afl_frida/afl-frida.c @@ -111,7 +111,7 @@ inline static void afl_maybe_log(guint64 current_pc) { } -#if GUM_NATIVE_CPU == GUM_CPU_AMD64 +#ifdef __x86_64__ static const guint8 afl_maybe_log_code[] = { @@ -177,7 +177,7 @@ void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output, if (instr->address >= range->code_start && instr->address <= range->code_end) { -#if GUM_NATIVE_CPU == GUM_CPU_AMD64 +#ifdef __x86_64__ GumX86Writer *cw = output->writer.x86; if (range->current_log_impl == 0 || !gum_x86_writer_can_branch_directly_between( -- cgit 1.4.1 From 565f61a6abc30dfb4df0269384466589690fbae5 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 2 Apr 2021 20:09:16 +0400 Subject: Initialalize the autodict-ql Initialalize the autodict-ql add codeql scripts --- utils/autodict_ql/litool.ql | 10 ++++++++++ utils/autodict_ql/memcmp-str.ql | 8 ++++++++ utils/autodict_ql/strcmp-str.ql | 8 ++++++++ utils/autodict_ql/strncmp-str.ql | 8 ++++++++ utils/autodict_ql/strtool.ql | 24 ++++++++++++++++++++++++ 5 files changed, 58 insertions(+) create mode 100644 utils/autodict_ql/litool.ql create mode 100644 utils/autodict_ql/memcmp-str.ql create mode 100644 utils/autodict_ql/strcmp-str.ql create mode 100644 utils/autodict_ql/strncmp-str.ql create mode 100644 utils/autodict_ql/strtool.ql (limited to 'utils') diff --git a/utils/autodict_ql/litool.ql b/utils/autodict_ql/litool.ql new file mode 100644 index 00000000..b7f4bf33 --- /dev/null +++ b/utils/autodict_ql/litool.ql @@ -0,0 +1,10 @@ +import cpp + +class HexOrOctLiteral extends Literal{ + HexOrOctLiteral(){ + (this instanceof HexLiteral) or (this instanceof OctalLiteral) + } +} + +from HexOrOctLiteral lit +select lit.getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-str.ql b/utils/autodict_ql/memcmp-str.ql new file mode 100644 index 00000000..830c9cac --- /dev/null +++ b/utils/autodict_ql/memcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : memcmp trace + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("memcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-str.ql b/utils/autodict_ql/strcmp-str.ql new file mode 100644 index 00000000..83ffadaf --- /dev/null +++ b/utils/autodict_ql/strcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strcmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-str.ql b/utils/autodict_ql/strncmp-str.ql new file mode 100644 index 00000000..dbb952e5 --- /dev/null +++ b/utils/autodict_ql/strncmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strncmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strncmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql new file mode 100644 index 00000000..f78aabbb --- /dev/null +++ b/utils/autodict_ql/strtool.ql @@ -0,0 +1,24 @@ +import cpp +import semmle.code.cpp.dataflow.DataFlow +class StringLiteralNode extends DataFlow::Node { + StringLiteralNode() { this.asExpr() instanceof StringLiteral } +} +class MemcmpArgNode extends DataFlow::Node { + MemcmpArgNode() { + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(0) = this.asExpr() + ) + or + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(1) = this.asExpr() + ) + } +} + +from StringLiteralNode src, MemcmpArgNode arg +where + DataFlow::localFlow(src, arg) + +select src.asExpr().(StringLiteral).toString() \ No newline at end of file -- cgit 1.4.1 From a26ed3b7580e31b6f6f174169528fc0bebe20ad6 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:02:45 +0400 Subject: update the codes, readme - add readme - add required qlpack.yml --- utils/autodict_ql/autodict_ql.py | 188 ++++++++++++++++++++++++++++++++++++++ utils/autodict_ql/build-codeql.sh | 17 ++++ utils/autodict_ql/litan.py | 86 +++++++++++++++++ utils/autodict_ql/qlpack.yml | 3 + utils/autodict_ql/readme.md | 81 ++++++++++++++++ utils/autodict_ql/strtool.ql | 6 +- 6 files changed, 378 insertions(+), 3 deletions(-) create mode 100644 utils/autodict_ql/autodict_ql.py create mode 100644 utils/autodict_ql/build-codeql.sh create mode 100644 utils/autodict_ql/litan.py create mode 100644 utils/autodict_ql/qlpack.yml create mode 100644 utils/autodict_ql/readme.md (limited to 'utils') diff --git a/utils/autodict_ql/autodict_ql.py b/utils/autodict_ql/autodict_ql.py new file mode 100644 index 00000000..69d11f48 --- /dev/null +++ b/utils/autodict_ql/autodict_ql.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import shutil +import subprocess + +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + + #parser.add_argument("tokenpath", + #help="Destination directory for tokens") + parser.add_argument("cur", + help = "Current Path") + parser.add_argument("db", + help = "CodeQL database Path") + parser.add_argument("tokenpath", + help="Destination directory for tokens") + + return parser.parse_args() + +def static_analysis(file,file2,cur,db) : + with open(cur+"/"+file, "w") as f: + print(cur+"/"+file) + stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + output = stream.read() + f.write(output) + f.close() + +def copy_tokens(cur, tokenpath) : + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + #strtool-strs + + +def codeql_analysis(cur, db) : + static_analysis("litout.out","litool.ql", cur, db) + static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) + static_analysis("strstr-strings.out","strstr-str.ql", cur, db) + static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) + static_analysis("global-values-strings.out","globals-values.ql", cur, db) + static_analysis("local-strings.out","locals-strs.ql", cur, db) + static_analysis("strtool-strings.out","strtool.ql", cur, db) + static_analysis("arrays.out","array-literals.ql", cur, db) + start_aflql(0,cur) + #command1 = [ + # 'codeql','query', 'run', + # cur + '/litool.ql', + # '-d', + # db, '>','fff.txt' + # ] + #with open("litool2.log", "w") as f: + # stream = os.popen("codeql query run litool.ql -d " + db ) + # output = stream.read() + # f.write(output) + # f.close() + #worker1 = subprocess.Popen(command1) + #print(worker1.communicate()) + + +def start_aflql(tokenpath, cur): + command = [ + 'python3', + cur + '/litan.py', + cur+'/lits/', + cur+'/litout.out' + ] + worker1 = subprocess.Popen(command) + print(worker1.communicate()) + + command1 = [ + 'python3', + cur + '/strcmp-strings.py', + cur + '/strcmp-strs/', + cur + '/strcmp-strings.out' + ] + worker2 = subprocess.Popen(command1) + print(worker2.communicate()) + + command2 = [ + 'python3', + cur + '/strncmp-strings.py', + cur + '/strncmp-strs/', + cur + '/strncmp-strings.out' + ] + worker3 = subprocess.Popen(command2) + print(worker3.communicate()) + + command3 = [ + 'python3', + cur + '/array-lits.py', + cur + '/arrays-lits/', + cur + '/arrays.out' + ] + worker4 = subprocess.Popen(command3) + print(worker4.communicate()) + + command4 = [ + 'python3', + cur + '/array-strings.py', + cur + '/arrays-strs/', + cur + '/arrays.out' + ] + worker5 = subprocess.Popen(command4) + print(worker5.communicate()) + + + command5 = [ + 'python3', + cur + '/memcmp-strings.py', + cur + '/memcmp-strs/', + cur + '/memcmp-strings.out' + ] + worker6 = subprocess.Popen(command5) + print(worker6.communicate()) + + command6 = [ + 'python3', + cur + '/globals-strings.py', + cur + '/global-strs/', + cur + '/global-values-strings.out' + ] + worker7 = subprocess.Popen(command6) + print(worker7.communicate()) + + command7 = [ + 'python3', + cur + '/strstr-strings.py', + cur + '/strstr-strs/', + cur + '/strstr-strings.out' + ] + worker8 = subprocess.Popen(command7) + print(worker8.communicate()) + + + #strtool-strings.out + + command8 = [ + 'python3', + cur + '/stan-strings.py', + cur + '/strtool-strs/', + cur + '/strtool-strings.out' + ] + worker9 = subprocess.Popen(command8) + print(worker9.communicate()) + + command9 = [ + 'python3', + cur + '/local-strings.py', + cur + '/local-strs/', + cur + '/local-strings.out' + ] + worker10 = subprocess.Popen(command9) + print(worker10.communicate()) + +def main(): + args = parse_args() + ensure_dir(args.tokenpath) + #copy_tokens(args.cur, args.tokenpath) + codeql_analysis(args.cur, args.db) + copy_tokens(args.cur, args.tokenpath) + #start_aflql(args.tokenpath, args.cur) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh new file mode 100644 index 00000000..ccff932e --- /dev/null +++ b/utils/autodict_ql/build-codeql.sh @@ -0,0 +1,17 @@ +cd ~ +if [ -d "codeql-home" ]; then + echo "Exist !" + exit 1 +fi +sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y +mkdir codeql-home +cd codeql-home +git clone https://github.com/github/codeql.git codeql-repo +git clone https://github.com/github/codeql-go.git +wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip +unzip codeql-linux64.zip +mv codeql codeql-cli +export "PATH=~/codeql-home/codeql-cli/:$PATH" +codeql resolve languages +codeql resolve qlpacks +echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc \ No newline at end of file diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py new file mode 100644 index 00000000..18c04c34 --- /dev/null +++ b/utils/autodict_ql/litan.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com +import string +import os +import binascii +import codecs +import struct +import errno +import argparse +import re +import base64 +from binascii import unhexlify +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt")) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate files.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt") + return parser.parse_args() +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno == errno.EEXIST: + #print "[-] Directory exists, specify another directory" + exit(1) +def do_analysis1(corpdir, infile): + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() + new_lst = [] + n = 1 + for i, num in enumerate(lines): + if i != 0: + new_lst.append(num) + str1 = str(num) + print ("num is " + str1) + str1 = str1.rstrip('\n\n') + #str1 = str1.replace("0x",""); + str1 = str1.replace("|","") + str1 = str1.rstrip('\r\n') + str1 = str1.rstrip('\n') + str1 = str1.replace(" ","") + #str1 = str1.translate(None, string.punctuation) + translator=str.maketrans('','',string.punctuation) + str1=str1.translate(translator) + str1 = str1[1:] + str1 = str1[:-1] + print("After cleanup : " + str1) + if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8): + print ("first : "+str1) + if len(str1) > 8 : + str1 = str1[:-1] + elif (len(str1) == 5) : + str1 = str1 = "0" + try: + #str1 = str1.decode("hex") + with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file: + str1 = str1.replace("0x",""); + print (str1) + str1 = int(str1,base=16) + str1 = str1.to_bytes(4, byteorder='little') + file.write(str(str1)) + file.close() + with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q : + a = q.readline() + a = a[1:] + print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a)) + q.close() + with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 : + w1.write(str(a)) + print ("Done!") + w1.close() + except: + print("Error!") + n = n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_analysis1(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml new file mode 100644 index 00000000..c037a344 --- /dev/null +++ b/utils/autodict_ql/qlpack.yml @@ -0,0 +1,3 @@ +name: automate +version: 0.0.0 +libraryPathDependencies: codeql-cpp diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md new file mode 100644 index 00000000..77a15f8e --- /dev/null +++ b/utils/autodict_ql/readme.md @@ -0,0 +1,81 @@ +# Autodict-QL - Optimal Token Generation for Fuzzing + +## What is this? + +Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. + +Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . + + +## Why CodeQL ? +We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. + + +## CodeQL scripts +Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test. + +Currently we provided the following CodeQL scripts : + +`strcmp-str.ql` is used to extract strings that are related to `strcmp` function. + +`strncmp-str.ql` is used to extract the strings from the `strncmp` function. + +`memcmp-str.ql` is used to extract the strings from the `memcmp` function. + +`litool.ql` extracts Magic numbers as Hexadecimal format. + +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. + +You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. + + +## Usage +The usage of Autodict-QL is pretty easy. But let's describe it as : + +1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : + +` # chmod +x codeql-build.sh` + +` # codeql ` + +Then you should get : + +` Usage: codeql ... +Create and query CodeQL databases, or work with the QL language. + +GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is +not itself free software. Type codeql --license to see the license terms. + + --license Show the license terms for the CodeQL toolchain. +Common options: + -h, --help Show this help text. + -v, --verbose Incrementally increase the number of progress messages printed. + -q, --quiet Incrementally decrease the number of progress messages printed. +Some advanced options have been hidden; try --help -v for a fuller view. +Commands: + query Compile and execute QL code. + bqrs Get information from .bqrs files. + database Create, analyze and process CodeQL databases. + dataset [Plumbing] Work with raw QL datasets. + test Execute QL unit tests. + resolve [Deep plumbing] Helper commands to resolve disk locations etc. + execute [Deep plumbing] Low-level commands that need special JVM options. + version Show the version of the CodeQL toolchain. + generate Generate formatted QL documentation. + github Commands useful for interacting with the GitHub API through CodeQL. +` + +2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . + - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: + - `./configure --disable-shared` + - `codeql create database libxml-db --language=cpp --command=make + - Now you have the CodeQL database of the project :-) +3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. + - `mkdir automate` (inside the libxml directory) +4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : + - `codeql database upgrade ../libxml-db` +5. Everything is set! :-), now you should issue the following to get the tokens : + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. +6. Done! \ No newline at end of file diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql index f78aabbb..253d1555 100644 --- a/utils/autodict_ql/strtool.ql +++ b/utils/autodict_ql/strtool.ql @@ -3,8 +3,8 @@ import semmle.code.cpp.dataflow.DataFlow class StringLiteralNode extends DataFlow::Node { StringLiteralNode() { this.asExpr() instanceof StringLiteral } } -class MemcmpArgNode extends DataFlow::Node { - MemcmpArgNode() { +class CmpArgNode extends DataFlow::Node { + CmpArgNode() { exists(FunctionCall fc | fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and fc.getArgument(0) = this.asExpr() @@ -17,7 +17,7 @@ class MemcmpArgNode extends DataFlow::Node { } } -from StringLiteralNode src, MemcmpArgNode arg +from StringLiteralNode src, CmpArgNode arg where DataFlow::localFlow(src, arg) -- cgit 1.4.1 From 6088a0d4c2aeada7d952ce05bc1e683b858b1ade Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:04:17 +0400 Subject: update readme update readme --- utils/autodict_ql/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 77a15f8e..a610afb7 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -40,7 +40,7 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : Then you should get : -` Usage: codeql ... +' Usage: codeql ... Create and query CodeQL databases, or work with the QL language. GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is @@ -63,7 +63,7 @@ Commands: version Show the version of the CodeQL toolchain. generate Generate formatted QL documentation. github Commands useful for interacting with the GitHub API through CodeQL. -` +' 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: -- cgit 1.4.1 From cabde32140d6b781fea3c81e535b717bd01b1ec7 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:06:18 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index a610afb7..82aa0a23 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -68,7 +68,7 @@ Commands: 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: - `./configure --disable-shared` - - `codeql create database libxml-db --language=cpp --command=make + - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) 3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. - `mkdir automate` (inside the libxml directory) -- cgit 1.4.1 From 01658fb2e8d923c2d3df225249d9e8e0931511e8 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:07:24 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 82aa0a23..e8d3c761 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -70,8 +70,8 @@ Commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. - - `mkdir automate` (inside the libxml directory) +3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. (inside the libxml directory) + - `mkdir automate` 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - `codeql database upgrade ../libxml-db` 5. Everything is set! :-), now you should issue the following to get the tokens : -- cgit 1.4.1 From 7a383342de6687a09b46151c1f3cf0d44810995a Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:07:52 +0400 Subject: rename python file rename python file --- utils/autodict_ql/autodict-ql.py | 188 +++++++++++++++++++++++++++++++++++++++ utils/autodict_ql/autodict_ql.py | 188 --------------------------------------- 2 files changed, 188 insertions(+), 188 deletions(-) create mode 100644 utils/autodict_ql/autodict-ql.py delete mode 100644 utils/autodict_ql/autodict_ql.py (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py new file mode 100644 index 00000000..69d11f48 --- /dev/null +++ b/utils/autodict_ql/autodict-ql.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import shutil +import subprocess + +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + + #parser.add_argument("tokenpath", + #help="Destination directory for tokens") + parser.add_argument("cur", + help = "Current Path") + parser.add_argument("db", + help = "CodeQL database Path") + parser.add_argument("tokenpath", + help="Destination directory for tokens") + + return parser.parse_args() + +def static_analysis(file,file2,cur,db) : + with open(cur+"/"+file, "w") as f: + print(cur+"/"+file) + stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + output = stream.read() + f.write(output) + f.close() + +def copy_tokens(cur, tokenpath) : + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + #strtool-strs + + +def codeql_analysis(cur, db) : + static_analysis("litout.out","litool.ql", cur, db) + static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) + static_analysis("strstr-strings.out","strstr-str.ql", cur, db) + static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) + static_analysis("global-values-strings.out","globals-values.ql", cur, db) + static_analysis("local-strings.out","locals-strs.ql", cur, db) + static_analysis("strtool-strings.out","strtool.ql", cur, db) + static_analysis("arrays.out","array-literals.ql", cur, db) + start_aflql(0,cur) + #command1 = [ + # 'codeql','query', 'run', + # cur + '/litool.ql', + # '-d', + # db, '>','fff.txt' + # ] + #with open("litool2.log", "w") as f: + # stream = os.popen("codeql query run litool.ql -d " + db ) + # output = stream.read() + # f.write(output) + # f.close() + #worker1 = subprocess.Popen(command1) + #print(worker1.communicate()) + + +def start_aflql(tokenpath, cur): + command = [ + 'python3', + cur + '/litan.py', + cur+'/lits/', + cur+'/litout.out' + ] + worker1 = subprocess.Popen(command) + print(worker1.communicate()) + + command1 = [ + 'python3', + cur + '/strcmp-strings.py', + cur + '/strcmp-strs/', + cur + '/strcmp-strings.out' + ] + worker2 = subprocess.Popen(command1) + print(worker2.communicate()) + + command2 = [ + 'python3', + cur + '/strncmp-strings.py', + cur + '/strncmp-strs/', + cur + '/strncmp-strings.out' + ] + worker3 = subprocess.Popen(command2) + print(worker3.communicate()) + + command3 = [ + 'python3', + cur + '/array-lits.py', + cur + '/arrays-lits/', + cur + '/arrays.out' + ] + worker4 = subprocess.Popen(command3) + print(worker4.communicate()) + + command4 = [ + 'python3', + cur + '/array-strings.py', + cur + '/arrays-strs/', + cur + '/arrays.out' + ] + worker5 = subprocess.Popen(command4) + print(worker5.communicate()) + + + command5 = [ + 'python3', + cur + '/memcmp-strings.py', + cur + '/memcmp-strs/', + cur + '/memcmp-strings.out' + ] + worker6 = subprocess.Popen(command5) + print(worker6.communicate()) + + command6 = [ + 'python3', + cur + '/globals-strings.py', + cur + '/global-strs/', + cur + '/global-values-strings.out' + ] + worker7 = subprocess.Popen(command6) + print(worker7.communicate()) + + command7 = [ + 'python3', + cur + '/strstr-strings.py', + cur + '/strstr-strs/', + cur + '/strstr-strings.out' + ] + worker8 = subprocess.Popen(command7) + print(worker8.communicate()) + + + #strtool-strings.out + + command8 = [ + 'python3', + cur + '/stan-strings.py', + cur + '/strtool-strs/', + cur + '/strtool-strings.out' + ] + worker9 = subprocess.Popen(command8) + print(worker9.communicate()) + + command9 = [ + 'python3', + cur + '/local-strings.py', + cur + '/local-strs/', + cur + '/local-strings.out' + ] + worker10 = subprocess.Popen(command9) + print(worker10.communicate()) + +def main(): + args = parse_args() + ensure_dir(args.tokenpath) + #copy_tokens(args.cur, args.tokenpath) + codeql_analysis(args.cur, args.db) + copy_tokens(args.cur, args.tokenpath) + #start_aflql(args.tokenpath, args.cur) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/autodict_ql.py b/utils/autodict_ql/autodict_ql.py deleted file mode 100644 index 69d11f48..00000000 --- a/utils/autodict_ql/autodict_ql.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python3 -import os -import string -import binascii -import codecs -import errno -import struct -import argparse -import shutil -import subprocess - -from binascii import unhexlify - -def ensure_dir(dir): - try: - os.makedirs(dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise - -def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - - #parser.add_argument("tokenpath", - #help="Destination directory for tokens") - parser.add_argument("cur", - help = "Current Path") - parser.add_argument("db", - help = "CodeQL database Path") - parser.add_argument("tokenpath", - help="Destination directory for tokens") - - return parser.parse_args() - -def static_analysis(file,file2,cur,db) : - with open(cur+"/"+file, "w") as f: - print(cur+"/"+file) - stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) - output = stream.read() - f.write(output) - f.close() - -def copy_tokens(cur, tokenpath) : - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - #strtool-strs - - -def codeql_analysis(cur, db) : - static_analysis("litout.out","litool.ql", cur, db) - static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) - static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("strstr-strings.out","strstr-str.ql", cur, db) - static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("global-values-strings.out","globals-values.ql", cur, db) - static_analysis("local-strings.out","locals-strs.ql", cur, db) - static_analysis("strtool-strings.out","strtool.ql", cur, db) - static_analysis("arrays.out","array-literals.ql", cur, db) - start_aflql(0,cur) - #command1 = [ - # 'codeql','query', 'run', - # cur + '/litool.ql', - # '-d', - # db, '>','fff.txt' - # ] - #with open("litool2.log", "w") as f: - # stream = os.popen("codeql query run litool.ql -d " + db ) - # output = stream.read() - # f.write(output) - # f.close() - #worker1 = subprocess.Popen(command1) - #print(worker1.communicate()) - - -def start_aflql(tokenpath, cur): - command = [ - 'python3', - cur + '/litan.py', - cur+'/lits/', - cur+'/litout.out' - ] - worker1 = subprocess.Popen(command) - print(worker1.communicate()) - - command1 = [ - 'python3', - cur + '/strcmp-strings.py', - cur + '/strcmp-strs/', - cur + '/strcmp-strings.out' - ] - worker2 = subprocess.Popen(command1) - print(worker2.communicate()) - - command2 = [ - 'python3', - cur + '/strncmp-strings.py', - cur + '/strncmp-strs/', - cur + '/strncmp-strings.out' - ] - worker3 = subprocess.Popen(command2) - print(worker3.communicate()) - - command3 = [ - 'python3', - cur + '/array-lits.py', - cur + '/arrays-lits/', - cur + '/arrays.out' - ] - worker4 = subprocess.Popen(command3) - print(worker4.communicate()) - - command4 = [ - 'python3', - cur + '/array-strings.py', - cur + '/arrays-strs/', - cur + '/arrays.out' - ] - worker5 = subprocess.Popen(command4) - print(worker5.communicate()) - - - command5 = [ - 'python3', - cur + '/memcmp-strings.py', - cur + '/memcmp-strs/', - cur + '/memcmp-strings.out' - ] - worker6 = subprocess.Popen(command5) - print(worker6.communicate()) - - command6 = [ - 'python3', - cur + '/globals-strings.py', - cur + '/global-strs/', - cur + '/global-values-strings.out' - ] - worker7 = subprocess.Popen(command6) - print(worker7.communicate()) - - command7 = [ - 'python3', - cur + '/strstr-strings.py', - cur + '/strstr-strs/', - cur + '/strstr-strings.out' - ] - worker8 = subprocess.Popen(command7) - print(worker8.communicate()) - - - #strtool-strings.out - - command8 = [ - 'python3', - cur + '/stan-strings.py', - cur + '/strtool-strs/', - cur + '/strtool-strings.out' - ] - worker9 = subprocess.Popen(command8) - print(worker9.communicate()) - - command9 = [ - 'python3', - cur + '/local-strings.py', - cur + '/local-strs/', - cur + '/local-strings.out' - ] - worker10 = subprocess.Popen(command9) - print(worker10.communicate()) - -def main(): - args = parse_args() - ensure_dir(args.tokenpath) - #copy_tokens(args.cur, args.tokenpath) - codeql_analysis(args.cur, args.db) - copy_tokens(args.cur, args.tokenpath) - #start_aflql(args.tokenpath, args.cur) -if __name__ == '__main__': - main() \ No newline at end of file -- cgit 1.4.1 From 67989e9f2acb5e39e9ef422c27f0fe9db3f7da95 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:09:38 +0400 Subject: update update --- utils/autodict_ql/readme.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index e8d3c761..b368002c 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -40,7 +40,8 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : Then you should get : -' Usage: codeql ... +" +Usage: codeql ... Create and query CodeQL databases, or work with the QL language. GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is @@ -63,7 +64,7 @@ Commands: version Show the version of the CodeQL toolchain. generate Generate formatted QL documentation. github Commands useful for interacting with the GitHub API through CodeQL. -' +" 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: @@ -74,7 +75,7 @@ Commands: - `mkdir automate` 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - `codeql database upgrade ../libxml-db` -5. Everything is set! :-), now you should issue the following to get the tokens : +5. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. -- cgit 1.4.1 From c4f418c3b27bad3cc61eb61e7b9cf6fb7bbe6868 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:12:24 +0400 Subject: Add shell command Add shell command --- utils/autodict_ql/readme.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index b368002c..c9c0d2d0 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -40,7 +40,7 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : Then you should get : -" +```shell Usage: codeql ... Create and query CodeQL databases, or work with the QL language. @@ -64,7 +64,7 @@ Commands: version Show the version of the CodeQL toolchain. generate Generate formatted QL documentation. github Commands useful for interacting with the GitHub API through CodeQL. -" +``` 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: @@ -76,7 +76,7 @@ Commands: 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - `codeql database upgrade ../libxml-db` 5. Everything is set! Now you should issue the following to get the tokens : - - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` - - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. 6. Done! \ No newline at end of file -- cgit 1.4.1 From bc99b5ba03815e2cfd2a6314a2fa9da78baa6fb6 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:13:18 +0400 Subject: update readme update readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index c9c0d2d0..c8e5556f 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -2,7 +2,7 @@ ## What is this? -Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . -- cgit 1.4.1 From b418c31479f5d5e1d10c75eafec9ead9351453cb Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:39:09 +0400 Subject: Some updates on readme Some updates on readme --- utils/autodict_ql/readme.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index c8e5556f..45f685c6 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -79,4 +79,19 @@ Commands: - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. -6. Done! \ No newline at end of file +6. Done! + + +## More on dictionaries and tokens +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. + +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` is working based on the target. +In overall, fuzzing with dictionaries and well-generated tokens will give better results. + +There are 2 important points to remember : + +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. +- Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. + + \ No newline at end of file -- cgit 1.4.1 From b7d12c8532da0a094aa2504e90f84a1530d569a9 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:45:46 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 45f685c6..39857f69 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -83,7 +83,7 @@ Commands: ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` is working based on the target. -- cgit 1.4.1 From 8f9d1fd7b05f916d8c43d5872be54d9074bdf8db Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 03:16:13 +0400 Subject: Updates update --- utils/autodict_ql/readme.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 39857f69..3e4655c8 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -34,10 +34,11 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : -` # chmod +x codeql-build.sh` - -` # codeql ` - +```shell +# chmod +x codeql-build.sh` +# sudo ./codeql-build.sh +# codeql ` +``` Then you should get : ```shell @@ -93,5 +94,3 @@ There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. - Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. - - \ No newline at end of file -- cgit 1.4.1 From 4291c3db5dca5082aed123f3a353f8af4a0f4785 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:04:06 +0400 Subject: finalize 1 commit final things --- utils/autodict_ql/autodict-ql.py | 91 ++++++++-------------------------------- utils/autodict_ql/readme.md | 6 ++- 2 files changed, 22 insertions(+), 75 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 69d11f48..ddc95435 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -1,4 +1,14 @@ #!/usr/bin/env python3 +# AutoDict-QL - Optimal Token Generation for Fuzzing +# Part of AFL++ Project +# Developed and Maintained by Arash Ale Ebrahim (@Microsvuln) +# Usage : python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH] +# CURRENT_DIR = full of your current Dir +# CODEQL_DATABASE_PATH = Full path to your CodeQL database +# TOKEN_PATH = Folder name of the newly generated tokens +# Example : python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens +# Just pass the tokens folder to the -x flag of your fuzzer + import os import string import binascii @@ -42,47 +52,25 @@ def static_analysis(file,file2,cur,db) : f.close() def copy_tokens(cur, tokenpath) : - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - #strtool-strs + def codeql_analysis(cur, db) : static_analysis("litout.out","litool.ql", cur, db) static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("strstr-strings.out","strstr-str.ql", cur, db) static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("global-values-strings.out","globals-values.ql", cur, db) - static_analysis("local-strings.out","locals-strs.ql", cur, db) static_analysis("strtool-strings.out","strtool.ql", cur, db) - static_analysis("arrays.out","array-literals.ql", cur, db) - start_aflql(0,cur) - #command1 = [ - # 'codeql','query', 'run', - # cur + '/litool.ql', - # '-d', - # db, '>','fff.txt' - # ] - #with open("litool2.log", "w") as f: - # stream = os.popen("codeql query run litool.ql -d " + db ) - # output = stream.read() - # f.write(output) - # f.close() - #worker1 = subprocess.Popen(command1) - #print(worker1.communicate()) - - -def start_aflql(tokenpath, cur): + start_autodict(0,cur) + + + +def start_autodict(tokenpath, cur): command = [ 'python3', cur + '/litan.py', @@ -110,23 +98,6 @@ def start_aflql(tokenpath, cur): worker3 = subprocess.Popen(command2) print(worker3.communicate()) - command3 = [ - 'python3', - cur + '/array-lits.py', - cur + '/arrays-lits/', - cur + '/arrays.out' - ] - worker4 = subprocess.Popen(command3) - print(worker4.communicate()) - - command4 = [ - 'python3', - cur + '/array-strings.py', - cur + '/arrays-strs/', - cur + '/arrays.out' - ] - worker5 = subprocess.Popen(command4) - print(worker5.communicate()) command5 = [ @@ -138,27 +109,8 @@ def start_aflql(tokenpath, cur): worker6 = subprocess.Popen(command5) print(worker6.communicate()) - command6 = [ - 'python3', - cur + '/globals-strings.py', - cur + '/global-strs/', - cur + '/global-values-strings.out' - ] - worker7 = subprocess.Popen(command6) - print(worker7.communicate()) - - command7 = [ - 'python3', - cur + '/strstr-strings.py', - cur + '/strstr-strs/', - cur + '/strstr-strings.out' - ] - worker8 = subprocess.Popen(command7) - print(worker8.communicate()) - #strtool-strings.out - command8 = [ 'python3', cur + '/stan-strings.py', @@ -168,14 +120,7 @@ def start_aflql(tokenpath, cur): worker9 = subprocess.Popen(command8) print(worker9.communicate()) - command9 = [ - 'python3', - cur + '/local-strings.py', - cur + '/local-strs/', - cur + '/local-strings.out' - ] - worker10 = subprocess.Popen(command9) - print(worker10.communicate()) + def main(): args = parse_args() @@ -183,6 +128,6 @@ def main(): #copy_tokens(args.cur, args.tokenpath) codeql_analysis(args.cur, args.db) copy_tokens(args.cur, args.tokenpath) - #start_aflql(args.tokenpath, args.cur) + #start_autodict(args.tokenpath, args.cur) if __name__ == '__main__': main() \ No newline at end of file diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 3e4655c8..f8d23098 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -67,7 +67,7 @@ Commands: github Commands useful for interacting with the GitHub API through CodeQL. ``` -2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` @@ -87,10 +87,12 @@ Commands: Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` is working based on the target. +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` is working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. - Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. + +Thanks are going to Marc Heuse, the AFL++ main developer, Antonio Morales and Stefan Nagy \ No newline at end of file -- cgit 1.4.1 From 7f6d256014ae6728bc938e33b7038105a5714c9b Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:06:12 +0400 Subject: space space --- utils/autodict_ql/readme.md | 1 + 1 file changed, 1 insertion(+) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index f8d23098..0449233b 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -30,6 +30,7 @@ You can write other CodeQL scripts to extract possible effective tokens if you t ## Usage + The usage of Autodict-QL is pretty easy. But let's describe it as : 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : -- cgit 1.4.1 From 7a3dfbce71d45742a6b571e41f07ae6b104e3a6b Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:07:13 +0400 Subject: remove things remove things --- utils/autodict_ql/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 0449233b..9e6a7292 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -36,9 +36,9 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : ```shell -# chmod +x codeql-build.sh` +# chmod +x codeql-build.sh # sudo ./codeql-build.sh -# codeql ` +# codeql ``` Then you should get : -- cgit 1.4.1 From 8d894eec90ef738702d42274cda0d6a4e5494627 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:11:33 +0400 Subject: Add python scripts Add python scripts --- utils/autodict_ql/memcmp-strings.py | 64 ++++++++++++++++++++++++++++++++++++ utils/autodict_ql/stan-strings.py | 60 +++++++++++++++++++++++++++++++++ utils/autodict_ql/strcmp-strings.py | 60 +++++++++++++++++++++++++++++++++ utils/autodict_ql/strncmp-strings.py | 64 ++++++++++++++++++++++++++++++++++++ 4 files changed, 248 insertions(+) create mode 100644 utils/autodict_ql/memcmp-strings.py create mode 100644 utils/autodict_ql/stan-strings.py create mode 100644 utils/autodict_ql/strcmp-strings.py create mode 100644 utils/autodict_ql/strncmp-strings.py (limited to 'utils') diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py new file mode 100644 index 00000000..e948fba4 --- /dev/null +++ b/utils/autodict_ql/memcmp-strings.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py new file mode 100644 index 00000000..c35d8a65 --- /dev/null +++ b/utils/autodict_ql/stan-strings.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)) or ("<" in str11) or (">" in str11) : + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py new file mode 100644 index 00000000..412b70ae --- /dev/null +++ b/utils/autodict_ql/strcmp-strings.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py new file mode 100644 index 00000000..2c07718e --- /dev/null +++ b/utils/autodict_ql/strncmp-strings.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file -- cgit 1.4.1 From 6c88b6b362ddc06effd8d99c32375ab34028665c Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:13:55 +0400 Subject: Update python scripts Update python scripts --- utils/autodict_ql/memcmp-strings.py | 2 +- utils/autodict_ql/stan-strings.py | 6 +++++- utils/autodict_ql/strcmp-strings.py | 6 +++++- utils/autodict_ql/strncmp-strings.py | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index e948fba4..fb892aff 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -53,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index c35d8a65..e9f6f0d0 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + import os import string import binascii @@ -49,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 412b70ae..a1b7e27c 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + import os import string import binascii @@ -49,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 2c07718e..2652f66e 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -53,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): -- cgit 1.4.1 From 70e975704465672f49273da9f4a8f7e56f745e20 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 15:47:22 +0400 Subject: new commit - change strings new commit - change strings --- utils/autodict_ql/autodict-ql.py | 1 + utils/autodict_ql/build-codeql.sh | 4 ++-- utils/autodict_ql/memcmp-strings.py | 3 +-- utils/autodict_ql/readme.md | 11 ++++++++--- utils/autodict_ql/stan-strings.py | 3 +-- utils/autodict_ql/strcmp-strings.py | 3 +-- utils/autodict_ql/strncmp-strings.py | 3 +-- 7 files changed, 15 insertions(+), 13 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index ddc95435..7bba57fc 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -57,6 +57,7 @@ def copy_tokens(cur, tokenpath) : subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh index ccff932e..450207f6 100644 --- a/utils/autodict_ql/build-codeql.sh +++ b/utils/autodict_ql/build-codeql.sh @@ -3,7 +3,6 @@ if [ -d "codeql-home" ]; then echo "Exist !" exit 1 fi -sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y mkdir codeql-home cd codeql-home git clone https://github.com/github/codeql.git codeql-repo @@ -12,6 +11,7 @@ wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/code unzip codeql-linux64.zip mv codeql codeql-cli export "PATH=~/codeql-home/codeql-cli/:$PATH" +echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc codeql resolve languages codeql resolve qlpacks -echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc \ No newline at end of file +codeql \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index fb892aff..2814da5b 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 9e6a7292..ccc9b0e3 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -31,13 +31,18 @@ You can write other CodeQL scripts to extract possible effective tokens if you t ## Usage +Before proceed to installation make sure that you have the following packages by installing them : +```shell +sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y +``` The usage of Autodict-QL is pretty easy. But let's describe it as : -1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : - +1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. +Do the following : ```shell # chmod +x codeql-build.sh -# sudo ./codeql-build.sh +# ./codeql-build.sh +# source ~/.bashrc # codeql ``` Then you should get : diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index e9f6f0d0..5a863f80 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)) or ("<" in str11) or (">" in str11) : + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index a1b7e27c..1852b947 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 2652f66e..f00fa3da 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: -- cgit 1.4.1 From d5fc03b71819ed75bd7134584e8f00a7f1010149 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 15:49:17 +0400 Subject: update qlpack name update qlpack name --- utils/autodict_ql/qlpack.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml index c037a344..28892f24 100644 --- a/utils/autodict_ql/qlpack.yml +++ b/utils/autodict_ql/qlpack.yml @@ -1,3 +1,3 @@ -name: automate +name: autodict version: 0.0.0 libraryPathDependencies: codeql-cpp -- cgit 1.4.1 From 050f331c54a7af2fdb2eb1ca33e9dacd9257dbb0 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:04:14 +0400 Subject: remove unessential things remove unessential things from scripts --- utils/autodict_ql/autodict-ql.py | 10 +++++----- utils/autodict_ql/memcmp-strings.py | 2 +- utils/autodict_ql/stan-strings.py | 2 +- utils/autodict_ql/strcmp-strings.py | 2 +- utils/autodict_ql/strncmp-strings.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 7bba57fc..b51fbb90 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -52,11 +52,11 @@ def static_analysis(file,file2,cur,db) : f.close() def copy_tokens(cur, tokenpath) : - subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index 2814da5b..d1047caa 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index 5a863f80..65d08c97 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 1852b947..88128dbb 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index f00fa3da..0ad0e697 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") -- cgit 1.4.1 From 05c13588d7f6a0c8e34623eeed0b2920737ba377 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:22:51 +0400 Subject: remove dirs remove dirs --- utils/autodict_ql/autodict-ql.py | 2 ++ utils/autodict_ql/readme.md | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index b51fbb90..0fe7eabf 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -57,6 +57,8 @@ def copy_tokens(cur, tokenpath) : subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"],shell=True) + subprocess.call(["rm *.out"],shell=True) subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index ccc9b0e3..16a2a20b 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -93,7 +93,7 @@ Commands: Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` is working based on the target. +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : @@ -101,4 +101,3 @@ There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. - Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. -Thanks are going to Marc Heuse, the AFL++ main developer, Antonio Morales and Stefan Nagy \ No newline at end of file -- cgit 1.4.1 From c4ad4681cf0aa3ff66f98053345ed7856692f25d Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:28:42 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 16a2a20b..d8a3b014 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -78,15 +78,13 @@ Commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. (inside the libxml directory) - - `mkdir automate` -4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - - `codeql database upgrade ../libxml-db` -5. Everything is set! Now you should issue the following to get the tokens : +3. The final step is to update the CodeQL database you created in the step 2 : + - `codeql database upgrade /home/user/libxml/libxml-db` +4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py `pwd` /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. -6. Done! +5. Done! ## More on dictionaries and tokens -- cgit 1.4.1 From 2b4e93faba3877aeb49ac873b77a930ebd6f0801 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:30:58 +0400 Subject: Add note Add note --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index d8a3b014..a8d252e4 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -78,7 +78,7 @@ Commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. The final step is to update the CodeQL database you created in the step 2 : +3. The final step is to update the CodeQL database you created in the step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory) : - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` -- cgit 1.4.1 From a7141b6a6ea045a20c0be7031bab0767064915ea Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:31:51 +0400 Subject: Add ` Add ` --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index a8d252e4..6beba871 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -82,7 +82,7 @@ Commands: - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py `pwd` /home/user/libxml/libxml-db tokens` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py ``pwd`` /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. 5. Done! -- cgit 1.4.1 From d35a90101f1ae51fa022332828209139a7e070ad Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:33:56 +0400 Subject: change cur change current dir --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 6beba871..8c24d65c 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -82,7 +82,7 @@ Commands: - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py ``pwd`` /home/user/libxml/libxml-db tokens` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. 5. Done! -- cgit 1.4.1 From 845c584b9cee7092772305912508b825155142fa Mon Sep 17 00:00:00 2001 From: begasus Date: Sun, 4 Apr 2021 17:41:43 +0000 Subject: Fix Haiku references, no and missing defines for USEMMAP --- instrumentation/afl-compiler-rt.o.c | 4 +++- utils/afl_network_proxy/afl-network-server.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index f241447a..fa53263c 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -34,8 +34,10 @@ #include #include -#include #ifndef __HAIKU__ + #include +#endif +#ifndef USEMMAP #include #endif #include diff --git a/utils/afl_network_proxy/afl-network-server.c b/utils/afl_network_proxy/afl-network-server.c index 0dfae658..60f174ee 100644 --- a/utils/afl_network_proxy/afl-network-server.c +++ b/utils/afl_network_proxy/afl-network-server.c @@ -45,7 +45,6 @@ #include #include -#include #include #include #include @@ -53,7 +52,9 @@ #include #include #include -#include +#ifndef USEMMAP + #include +#endif #include #include -- cgit 1.4.1 From 6069cac313f4f8f4e696e815d4fe2f8bcaccccf4 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 14 Apr 2021 18:24:55 +0200 Subject: qemu driver new api --- utils/aflpp_driver/aflpp_qemu_driver_hook.c | 21 +++++++++++++++------ utils/qemu_persistent_hook/read_into_rdi.c | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/aflpp_qemu_driver_hook.c b/utils/aflpp_driver/aflpp_qemu_driver_hook.c index 823cc42d..d3dd98b0 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver_hook.c +++ b/utils/aflpp_driver/aflpp_qemu_driver_hook.c @@ -1,21 +1,30 @@ +#include "../../qemu_mode/qemuafl/qemuafl/api.h" + #include #include +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) +#define h2g(x) ((uint64_t)(x)-guest_base) -#define REGS_RDI 7 -#define REGS_RSI 6 + // In this example the register RDI is pointing to the memory location + // of the target buffer, and the length of the input is in RSI. + // This can be seen with a debugger, e.g. gdb (and "disass main") -void afl_persistent_hook(uint64_t *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_len) { + memcpy(g2h(regs->rdi), input_buf, input_buf_len); + regs->rsi = input_buf_len; - memcpy(g2h(regs[REGS_RDI]), input_buf, input_len); - regs[REGS_RSI] = input_len; +#undef g2h +#undef h2g } int afl_persistent_hook_init(void) { + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) return 1; } diff --git a/utils/qemu_persistent_hook/read_into_rdi.c b/utils/qemu_persistent_hook/read_into_rdi.c index f4a8ae59..c1c6642f 100644 --- a/utils/qemu_persistent_hook/read_into_rdi.c +++ b/utils/qemu_persistent_hook/read_into_rdi.c @@ -5,7 +5,7 @@ void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, uint8_t *input_buf, uint32_t input_buf_len) { -\ + #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) #define h2g(x) ((uint64_t)(x)-guest_base) -- cgit 1.4.1 From f0d300b32a8a5b3adccc8209c151382244135082 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 18:36:22 +0200 Subject: add readme --- utils/aflpp_driver/README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 utils/aflpp_driver/README.md (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md new file mode 100644 index 00000000..2c339d12 --- /dev/null +++ b/utils/aflpp_driver/README.md @@ -0,0 +1,25 @@ +# afl++ drivers + +## aflpp_driver + +aflpp_driver is used to compile directly libfuzzer `LLVMFuzzerTestOneInput()` +targets. + +Just do `afl-clang-fast++ -o fuzz fuzzer_harness.cc libAFLDriver.a [plus required linking]`. + +You can also sneakily do this little trick: +If this is the clang compile command to build for libfuzzer: + `clang++ -o fuzz -fsanitize=fuzzer fuzzer_harness.cc -lfoo` +then just switch `clang++` with `afl-clang-fast++` and our compiler will +magically insert libAFLDriver.a :) + + +## aflpp_qemu_driver + +aflpp_qemu_driver is used for libfuzzer `LLVMFuzzerTestOneInput()` targets that +are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without +-fsantize=fuzzer or afl-clang-fast, and link in libAFLQemuDriver.a: + +`clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. + +Then just do `AFL_PRELOAD=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` -- cgit 1.4.1 From fd8dc1455278bca16e852eb08ddac9a3e466b5c7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 18:49:02 +0200 Subject: update readme --- utils/aflpp_driver/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md index 2c339d12..01bd10c0 100644 --- a/utils/aflpp_driver/README.md +++ b/utils/aflpp_driver/README.md @@ -22,4 +22,9 @@ are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without `clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. -Then just do `AFL_PRELOAD=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` + +Then just do (where the name of the binary is `fuzz`): +``` +AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}') +AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` +``` -- cgit 1.4.1 From 4a0e0270adafbc583d491dfad74d9378a4c06bf7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 22:23:16 +0200 Subject: allow aflpp_qemu_driver_hook.o to fail --- utils/aflpp_driver/GNUmakefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index c1a087d7..8ac054a6 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -26,17 +26,17 @@ debug: ar ru libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c - $(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c + -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o - ar ru libAFLQemuDriver.a aflpp_qemu_driver.o - cp -vf libAFLQemuDriver.a ../../ + -ar ru libAFLQemuDriver.a aflpp_qemu_driver.o + -cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so + -$(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - $(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c + -$(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c -- cgit 1.4.1 From c8e96e52536d47ee41967657202574d8e61562ee Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 15 Apr 2021 23:56:58 +0200 Subject: autoformat with black --- frida_mode/test/testinstr.py | 49 ++++--- unicorn_mode/helper_scripts/ida_context_loader.py | 84 +++++++----- utils/autodict_ql/autodict-ql.py | 154 ++++++++++++---------- utils/autodict_ql/litan.py | 126 +++++++++++------- utils/autodict_ql/memcmp-strings.py | 64 +++++---- utils/autodict_ql/stan-strings.py | 64 +++++---- utils/autodict_ql/strcmp-strings.py | 64 +++++---- utils/autodict_ql/strncmp-strings.py | 64 +++++---- 8 files changed, 409 insertions(+), 260 deletions(-) (limited to 'utils') diff --git a/frida_mode/test/testinstr.py b/frida_mode/test/testinstr.py index 8f5fe886..f648808b 100755 --- a/frida_mode/test/testinstr.py +++ b/frida_mode/test/testinstr.py @@ -1,32 +1,49 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import argparse from elftools.elf.elffile import ELFFile + def process_file(file, section, base): - with open(file, 'rb') as f: + with open(file, "rb") as f: for sect in ELFFile(f).iter_sections(): - if (sect.name == section): - start = base + sect.header['sh_offset'] - end = start + sect.header['sh_size'] - print ("0x%016x-0x%016x" % (start, end)) + if sect.name == section: + start = base + sect.header["sh_offset"] + end = start + sect.header["sh_size"] + print("0x%016x-0x%016x" % (start, end)) return - print ("Section '%s' not found in '%s'" % (section, file)) + print("Section '%s' not found in '%s'" % (section, file)) + def hex_value(x): return int(x, 16) + def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--section', dest='section', type=str, - help='elf section name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) + parser = argparse.ArgumentParser(description="Process some integers.") + parser.add_argument( + "-f", "--file", dest="file", type=str, help="elf file name", required=True + ) + parser.add_argument( + "-s", + "--section", + dest="section", + type=str, + help="elf section name", + required=True, + ) + parser.add_argument( + "-b", + "--base", + dest="base", + type=hex_value, + help="elf base address", + required=True, + ) args = parser.parse_args() - process_file (args.file, args.section, args.base) + process_file(args.file, args.section, args.base) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/unicorn_mode/helper_scripts/ida_context_loader.py b/unicorn_mode/helper_scripts/ida_context_loader.py index 31d47a90..d7984c77 100644 --- a/unicorn_mode/helper_scripts/ida_context_loader.py +++ b/unicorn_mode/helper_scripts/ida_context_loader.py @@ -34,13 +34,11 @@ import ida_segment class ContextLoaderError(Exception): - """Base "catch all" exception for this script - """ + """Base "catch all" exception for this script""" class ArchNotSupportedError(ContextLoaderError): - """Exception raised if the input file CPU architecture isn't supported fully - """ + """Exception raised if the input file CPU architecture isn't supported fully""" def parse_mapping_index(filepath: str): @@ -51,13 +49,16 @@ def parse_mapping_index(filepath: str): """ if filepath is None: - raise ContextLoaderError('_index.json file was not selected') + raise ContextLoaderError("_index.json file was not selected") try: - with open(filepath, 'rb') as _file: + with open(filepath, "rb") as _file: return json.load(_file) except Exception as ex: - raise ContextLoaderError('Failed to parse json file {}'.format(filepath)) from ex + raise ContextLoaderError( + "Failed to parse json file {}".format(filepath) + ) from ex + def get_input_name(): """Get the name of the input file @@ -68,19 +69,21 @@ def get_input_name(): input_filepath = ida_nalt.get_input_file_path() return Path(input_filepath).name + def write_segment_bytes(start: int, filepath: str): - """"Read data from context file and write it to the IDA segment + """ "Read data from context file and write it to the IDA segment :param start: Start address :param filepath: Path to context file """ - with open(filepath, 'rb') as _file: + with open(filepath, "rb") as _file: data = _file.read() decompressed_data = zlib.decompress(data) ida_bytes.put_bytes(start, decompressed_data) + def create_segment(context_dir: str, segment: dict, is_be: bool): """Create segment in IDA and map in the data from the file @@ -90,23 +93,30 @@ def create_segment(context_dir: str, segment: dict, is_be: bool): """ input_name = get_input_name() - if Path(segment['name']).name != input_name: + if Path(segment["name"]).name != input_name: ida_seg = idaapi.segment_t() - ida_seg.start_ea = segment['start'] - ida_seg.end_ea = segment['end'] + ida_seg.start_ea = segment["start"] + ida_seg.end_ea = segment["end"] ida_seg.bitness = 1 if is_be else 0 - if segment['permissions']['r']: + if segment["permissions"]["r"]: ida_seg.perm |= ida_segment.SEGPERM_READ - if segment['permissions']['w']: + if segment["permissions"]["w"]: ida_seg.perm |= ida_segment.SEGPERM_WRITE - if segment['permissions']['x']: + if segment["permissions"]["x"]: ida_seg.perm |= ida_segment.SEGPERM_EXEC - idaapi.add_segm_ex(ida_seg, Path(segment['name']).name, 'CODE', idaapi.ADDSEG_OR_DIE) + idaapi.add_segm_ex( + ida_seg, Path(segment["name"]).name, "CODE", idaapi.ADDSEG_OR_DIE + ) else: - idaapi.add_segm_ex(ida_seg, Path(segment['name']).name, 'DATA', idaapi.ADDSEG_OR_DIE) + idaapi.add_segm_ex( + ida_seg, Path(segment["name"]).name, "DATA", idaapi.ADDSEG_OR_DIE + ) + + if segment["content_file"]: + write_segment_bytes( + segment["start"], PurePath(context_dir, segment["content_file"]) + ) - if segment['content_file']: - write_segment_bytes(segment['start'], PurePath(context_dir, segment['content_file'])) def create_segments(index: dict, context_dir: str): """Iterate segments in index JSON, create the segment in IDA, and map in the data from the file @@ -117,9 +127,10 @@ def create_segments(index: dict, context_dir: str): info = idaapi.get_inf_structure() is_be = info.is_be() - for segment in index['segments']: + for segment in index["segments"]: create_segment(context_dir, segment, is_be) + def rebase_program(index: dict): """Rebase the program to the offset specified in the context _index.json @@ -128,20 +139,21 @@ def rebase_program(index: dict): input_name = get_input_name() new_base = None - for segment in index['segments']: - if not segment['name']: + for segment in index["segments"]: + if not segment["name"]: continue - segment_name = Path(segment['name']).name + segment_name = Path(segment["name"]).name if input_name == segment_name: - new_base = segment['start'] + new_base = segment["start"] break if not new_base: - raise ContextLoaderError('Input file is not in _index.json') + raise ContextLoaderError("Input file is not in _index.json") current_base = idaapi.get_imagebase() - ida_segment.rebase_program(new_base-current_base, 8) + ida_segment.rebase_program(new_base - current_base, 8) + def get_pc_by_arch(index: dict) -> int: """Queries the input file CPU architecture and attempts to lookup the address of the program @@ -153,13 +165,14 @@ def get_pc_by_arch(index: dict) -> int: progctr = None info = idaapi.get_inf_structure() - if info.procname == 'metapc': + if info.procname == "metapc": if info.is_64bit(): - progctr = index['regs']['rax'] + progctr = index["regs"]["rax"] elif info.is_32bit(): - progctr = index['regs']['eax'] + progctr = index["regs"]["eax"] return progctr + def write_reg_info(index: dict): """Write register info as line comment at instruction pointed to by the program counter and change focus to that location @@ -167,17 +180,19 @@ def write_reg_info(index: dict): :param index: _index.json JSON data """ - cmt = '' - for reg, val in index['regs'].items(): + cmt = "" + for reg, val in index["regs"].items(): cmt += f"{reg.ljust(6)} : {hex(val)}\n" progctr = get_pc_by_arch(index) if progctr is None: raise ArchNotSupportedError( - 'Architecture not fully supported, skipping register status comment') + "Architecture not fully supported, skipping register status comment" + ) ida_bytes.set_cmt(progctr, cmt, 0) ida_kernwin.jumpto(progctr) + def main(filepath): """Main - parse _index.json input and map context files into the database @@ -193,5 +208,6 @@ def main(filepath): except ContextLoaderError as ex: print(ex) -if __name__ == '__main__': - main(ida_kernwin.ask_file(1, '*.json', 'Import file name')) + +if __name__ == "__main__": + main(ida_kernwin.ask_file(1, "*.json", "Import file name")) diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 0fe7eabf..f64e3fae 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -11,7 +11,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -21,6 +21,7 @@ import subprocess from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -28,109 +29,118 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - - #parser.add_argument("tokenpath", - #help="Destination directory for tokens") - parser.add_argument("cur", - help = "Current Path") - parser.add_argument("db", - help = "CodeQL database Path") - parser.add_argument("tokenpath", - help="Destination directory for tokens") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + + # parser.add_argument("tokenpath", + # help="Destination directory for tokens") + parser.add_argument("cur", help="Current Path") + parser.add_argument("db", help="CodeQL database Path") + parser.add_argument("tokenpath", help="Destination directory for tokens") return parser.parse_args() -def static_analysis(file,file2,cur,db) : - with open(cur+"/"+file, "w") as f: - print(cur+"/"+file) - stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + +def static_analysis(file, file2, cur, db): + with open(cur + "/" + file, "w") as f: + print(cur + "/" + file) + stream = os.popen("codeql query run " + cur + "/" + file2 + " -d " + db) output = stream.read() f.write(output) f.close() -def copy_tokens(cur, tokenpath) : - subprocess.call(["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"],shell=True) - subprocess.call(["rm *.out"],shell=True) - subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) - - - -def codeql_analysis(cur, db) : - static_analysis("litout.out","litool.ql", cur, db) - static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) - static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("strtool-strings.out","strtool.ql", cur, db) - start_autodict(0,cur) +def copy_tokens(cur, tokenpath): + subprocess.call( + ["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."], shell=True + ) + subprocess.call( + ["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"], shell=True + ) + subprocess.call(["rm *.out"], shell=True) + subprocess.call(["find " + tokenpath + " -size 0 -delete"], shell=True) + + +def codeql_analysis(cur, db): + static_analysis("litout.out", "litool.ql", cur, db) + static_analysis("strcmp-strings.out", "strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out", "strncmp-str.ql", cur, db) + static_analysis("memcmp-strings.out", "memcmp-str.ql", cur, db) + static_analysis("strtool-strings.out", "strtool.ql", cur, db) + start_autodict(0, cur) def start_autodict(tokenpath, cur): - command = [ - 'python3', - cur + '/litan.py', - cur+'/lits/', - cur+'/litout.out' - ] + command = ["python3", cur + "/litan.py", cur + "/lits/", cur + "/litout.out"] worker1 = subprocess.Popen(command) print(worker1.communicate()) - + command1 = [ - 'python3', - cur + '/strcmp-strings.py', - cur + '/strcmp-strs/', - cur + '/strcmp-strings.out' - ] + "python3", + cur + "/strcmp-strings.py", + cur + "/strcmp-strs/", + cur + "/strcmp-strings.out", + ] worker2 = subprocess.Popen(command1) print(worker2.communicate()) command2 = [ - 'python3', - cur + '/strncmp-strings.py', - cur + '/strncmp-strs/', - cur + '/strncmp-strings.out' - ] + "python3", + cur + "/strncmp-strings.py", + cur + "/strncmp-strs/", + cur + "/strncmp-strings.out", + ] worker3 = subprocess.Popen(command2) print(worker3.communicate()) - - command5 = [ - 'python3', - cur + '/memcmp-strings.py', - cur + '/memcmp-strs/', - cur + '/memcmp-strings.out' - ] + "python3", + cur + "/memcmp-strings.py", + cur + "/memcmp-strs/", + cur + "/memcmp-strings.out", + ] worker6 = subprocess.Popen(command5) print(worker6.communicate()) - - command8 = [ - 'python3', - cur + '/stan-strings.py', - cur + '/strtool-strs/', - cur + '/strtool-strings.out' - ] + "python3", + cur + "/stan-strings.py", + cur + "/strtool-strs/", + cur + "/strtool-strings.out", + ] worker9 = subprocess.Popen(command8) print(worker9.communicate()) - def main(): - args = parse_args() + args = parse_args() ensure_dir(args.tokenpath) - #copy_tokens(args.cur, args.tokenpath) + # copy_tokens(args.cur, args.tokenpath) codeql_analysis(args.cur, args.db) copy_tokens(args.cur, args.tokenpath) - #start_autodict(args.tokenpath, args.cur) -if __name__ == '__main__': - main() \ No newline at end of file + # start_autodict(args.tokenpath, args.cur) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py index 18c04c34..7033d363 100644 --- a/utils/autodict_ql/litan.py +++ b/utils/autodict_ql/litan.py @@ -4,7 +4,7 @@ # Author : Microsvuln - Arash.vre@gmail.com import string import os -import binascii +import binascii import codecs import struct import errno @@ -12,75 +12,101 @@ import argparse import re import base64 from binascii import unhexlify + + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt")) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate files.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt") - return parser.parse_args() + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate files." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt", + ) + return parser.parse_args() + + def ensure_dir(dir): try: os.makedirs(dir) except OSError as e: if e.errno == errno.EEXIST: - #print "[-] Directory exists, specify another directory" + # print "[-] Directory exists, specify another directory" exit(1) + + def do_analysis1(corpdir, infile): - with open(infile, "rb") as f: - lines = f.readlines()[1:] - f.close() + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() new_lst = [] n = 1 for i, num in enumerate(lines): if i != 0: - new_lst.append(num) + new_lst.append(num) str1 = str(num) - print ("num is " + str1) - str1 = str1.rstrip('\n\n') - #str1 = str1.replace("0x",""); - str1 = str1.replace("|","") - str1 = str1.rstrip('\r\n') - str1 = str1.rstrip('\n') - str1 = str1.replace(" ","") - #str1 = str1.translate(None, string.punctuation) - translator=str.maketrans('','',string.punctuation) - str1=str1.translate(translator) + print("num is " + str1) + str1 = str1.rstrip("\n\n") + # str1 = str1.replace("0x",""); + str1 = str1.replace("|", "") + str1 = str1.rstrip("\r\n") + str1 = str1.rstrip("\n") + str1 = str1.replace(" ", "") + # str1 = str1.translate(None, string.punctuation) + translator = str.maketrans("", "", string.punctuation) + str1 = str1.translate(translator) str1 = str1[1:] str1 = str1[:-1] print("After cleanup : " + str1) - if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8): - print ("first : "+str1) - if len(str1) > 8 : + if ( + (str1 != "0") + and (str1 != "ffffffff") + and (str1 != "fffffffe") + or (len(str1) == 4) + or (len(str1) == 8) + ): + print("first : " + str1) + if len(str1) > 8: str1 = str1[:-1] - elif (len(str1) == 5) : + elif len(str1) == 5: str1 = str1 = "0" try: - #str1 = str1.decode("hex") - with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file: - str1 = str1.replace("0x",""); - print (str1) - str1 = int(str1,base=16) - str1 = str1.to_bytes(4, byteorder='little') - file.write(str(str1)) - file.close() - with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q : - a = q.readline() - a = a[1:] - print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a)) - q.close() - with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 : - w1.write(str(a)) - print ("Done!") - w1.close() - except: - print("Error!") - n = n+1 + # str1 = str1.decode("hex") + with open(corpdir + "/lit-seed{0}".format(n), "w") as file: + str1 = str1.replace("0x", "") + print(str1) + str1 = int(str1, base=16) + str1 = str1.to_bytes(4, byteorder="little") + file.write(str(str1)) + file.close() + with open(corpdir + "/lit-seed{0}".format(n), "r") as q: + a = q.readline() + a = a[1:] + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token :" + + str(a) + ) + q.close() + with open( + corpdir + "/lit-seed{0}".format(n), "w" + ) as w1: + w1.write(str(a)) + print("Done!") + w1.close() + except: + print("Error!") + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_analysis1(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index d1047caa..270a697c 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/memcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index 65d08c97..81cb0b97 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/seed-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 88128dbb..9c2520c9 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/strcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 0ad0e697..6206b4c4 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/strncmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() -- cgit 1.4.1 From 846a46e06052c13e3036fbee05866d165adb19cc Mon Sep 17 00:00:00 2001 From: hexcoder Date: Fri, 16 Apr 2021 12:12:52 +0200 Subject: review --- utils/autodict_ql/readme.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 8c24d65c..31a20352 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -2,13 +2,13 @@ ## What is this? -`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses CodeQL (a powerful semantic code analysis engine) to fetch information from a code base. -Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . +Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campaign. If you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . ## Why CodeQL ? -We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. +We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. ## CodeQL scripts @@ -16,7 +16,7 @@ Currently, we pushed some scripts as defaults for Token generation. In addition, Currently we provided the following CodeQL scripts : -`strcmp-str.ql` is used to extract strings that are related to `strcmp` function. +`strcmp-str.ql` is used to extract strings that are related to the `strcmp` function. `strncmp-str.ql` is used to extract the strings from the `strncmp` function. @@ -24,18 +24,18 @@ Currently we provided the following CodeQL scripts : `litool.ql` extracts Magic numbers as Hexadecimal format. -`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. If `strcmp` is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. ## Usage -Before proceed to installation make sure that you have the following packages by installing them : +Before you proceed to installation make sure that you have the following packages by installing them : ```shell sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y ``` -The usage of Autodict-QL is pretty easy. But let's describe it as : +The usage of Autodict-QL is pretty easy. But let's describe it as: 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. Do the following : @@ -45,7 +45,7 @@ Do the following : # source ~/.bashrc # codeql ``` -Then you should get : +Then you should get: ```shell Usage: codeql ... @@ -73,29 +73,29 @@ Commands: github Commands useful for interacting with the GitHub API through CodeQL. ``` -2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard. + - First you need to create a CodeQL database of the project codebase, suppose we want to compile `libxml` with codeql. Go to libxml and issue the following commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. The final step is to update the CodeQL database you created in the step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory) : +3. The final step is to update the CodeQL database you created in step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory): - `codeql database upgrade /home/user/libxml/libxml-db` -4. Everything is set! Now you should issue the following to get the tokens : +4. Everything is set! Now you should issue the following to get the tokens: - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` - - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to AFL++'s `-x` flag. 5. Done! ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. -On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extract useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. In addition it's independent from LLVM system. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. For example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generate dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. +I've personally prefered to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manually generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : -- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. -- Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chances to discover new bugs. +- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. -- cgit 1.4.1 From 523aaaebefb34737cbc0964a284b0ca67f477ad3 Mon Sep 17 00:00:00 2001 From: Microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:39:45 +0430 Subject: Add newline Add newline --- utils/autodict_ql/build-codeql.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh index 450207f6..6ae4b362 100644 --- a/utils/autodict_ql/build-codeql.sh +++ b/utils/autodict_ql/build-codeql.sh @@ -14,4 +14,4 @@ export "PATH=~/codeql-home/codeql-cli/:$PATH" echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc codeql resolve languages codeql resolve qlpacks -codeql \ No newline at end of file +codeql -- cgit 1.4.1 From 2019b42ceda386ce63e36312ea0606b216019bac Mon Sep 17 00:00:00 2001 From: Microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:41:12 +0430 Subject: Update readme fix typo in readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 8c24d65c..3402a210 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -88,7 +88,7 @@ Commands: ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generating tokens. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. -- cgit 1.4.1 From 98989f1088d04dd4c0d21834c38b7683f1cfb42d Mon Sep 17 00:00:00 2001 From: Microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:45:22 +0430 Subject: Add new line Add new line --- utils/autodict_ql/litool.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/litool.ql b/utils/autodict_ql/litool.ql index b7f4bf33..76f429c1 100644 --- a/utils/autodict_ql/litool.ql +++ b/utils/autodict_ql/litool.ql @@ -7,4 +7,4 @@ class HexOrOctLiteral extends Literal{ } from HexOrOctLiteral lit -select lit.getValueText() \ No newline at end of file +select lit.getValueText() -- cgit 1.4.1 From a3f8fc5d1ccabc455e28157ee86211f0c11c81a3 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 26 Apr 2021 16:03:08 +0200 Subject: moved custom_mutator examples --- custom_mutators/README.md | 8 + custom_mutators/examples/Makefile | 7 + custom_mutators/examples/README.md | 35 ++ custom_mutators/examples/XmlMutatorMin.py | 348 ++++++++++++++++++++ custom_mutators/examples/common.py | 40 +++ custom_mutators/examples/custom_mutator_helpers.h | 342 ++++++++++++++++++++ custom_mutators/examples/example.c | 376 ++++++++++++++++++++++ custom_mutators/examples/example.py | 187 +++++++++++ custom_mutators/examples/post_library_gif.so.c | 165 ++++++++++ custom_mutators/examples/post_library_png.so.c | 163 ++++++++++ custom_mutators/examples/simple-chunk-replace.py | 66 ++++ custom_mutators/examples/simple_example.c | 74 +++++ custom_mutators/examples/wrapper_afl_min.py | 123 +++++++ docs/custom_mutators.md | 4 +- docs/life_pro_tips.md | 2 +- qemu_mode/README.md | 4 +- test/test-custom-mutators.sh | 4 +- utils/custom_mutators/Makefile | 7 - utils/custom_mutators/README.md | 35 -- utils/custom_mutators/XmlMutatorMin.py | 348 -------------------- utils/custom_mutators/common.py | 40 --- utils/custom_mutators/custom_mutator_helpers.h | 342 -------------------- utils/custom_mutators/example.c | 376 ---------------------- utils/custom_mutators/example.py | 187 ----------- utils/custom_mutators/post_library_gif.so.c | 165 ---------- utils/custom_mutators/post_library_png.so.c | 163 ---------- utils/custom_mutators/simple-chunk-replace.py | 66 ---- utils/custom_mutators/simple_example.c | 74 ----- utils/custom_mutators/wrapper_afl_min.py | 123 ------- 29 files changed, 1941 insertions(+), 1933 deletions(-) create mode 100644 custom_mutators/examples/Makefile create mode 100644 custom_mutators/examples/README.md create mode 100644 custom_mutators/examples/XmlMutatorMin.py create mode 100644 custom_mutators/examples/common.py create mode 100644 custom_mutators/examples/custom_mutator_helpers.h create mode 100644 custom_mutators/examples/example.c create mode 100644 custom_mutators/examples/example.py create mode 100644 custom_mutators/examples/post_library_gif.so.c create mode 100644 custom_mutators/examples/post_library_png.so.c create mode 100644 custom_mutators/examples/simple-chunk-replace.py create mode 100644 custom_mutators/examples/simple_example.c create mode 100644 custom_mutators/examples/wrapper_afl_min.py delete mode 100644 utils/custom_mutators/Makefile delete mode 100644 utils/custom_mutators/README.md delete mode 100644 utils/custom_mutators/XmlMutatorMin.py delete mode 100644 utils/custom_mutators/common.py delete mode 100644 utils/custom_mutators/custom_mutator_helpers.h delete mode 100644 utils/custom_mutators/example.c delete mode 100644 utils/custom_mutators/example.py delete mode 100644 utils/custom_mutators/post_library_gif.so.c delete mode 100644 utils/custom_mutators/post_library_png.so.c delete mode 100644 utils/custom_mutators/simple-chunk-replace.py delete mode 100644 utils/custom_mutators/simple_example.c delete mode 100644 utils/custom_mutators/wrapper_afl_min.py (limited to 'utils') diff --git a/custom_mutators/README.md b/custom_mutators/README.md index b0444c85..5e1d0fe6 100644 --- a/custom_mutators/README.md +++ b/custom_mutators/README.md @@ -3,6 +3,14 @@ Custom mutators enhance and alter the mutation strategies of afl++. For further information and documentation on how to write your own, read [the docs](../docs/custom_mutators.md). +## Examples + +The `./examples` folder contains examples for custom mutators in python and C. + +## Rust + +In `./rust`, you will find rust bindings, including a simple example in `./rust/example` and an example for structured fuzzing, based on lain, in`./rust/example_lain`. + ## The afl++ Grammar Mutator If you use git to clone afl++, then the following will incorporate our diff --git a/custom_mutators/examples/Makefile b/custom_mutators/examples/Makefile new file mode 100644 index 00000000..9849f3f4 --- /dev/null +++ b/custom_mutators/examples/Makefile @@ -0,0 +1,7 @@ +all: libexamplemutator.so + +libexamplemutator.so: + $(CC) $(CFLAGS) -D_FORTIFY_SOURCE=2 -O3 -fPIC -shared -g -I ../../include example.c -o libexamplemutator.so + +clean: + rm -rf libexamplemutator.so diff --git a/custom_mutators/examples/README.md b/custom_mutators/examples/README.md new file mode 100644 index 00000000..655f7a5e --- /dev/null +++ b/custom_mutators/examples/README.md @@ -0,0 +1,35 @@ +# Examples for the custom mutator + +These are example and helper files for the custom mutator feature. +See [docs/custom_mutators.md](../../docs/custom_mutators.md) for more information + +Note that if you compile with python3.7 you must use python3 scripts, and if +you use python2.7 to compile python2 scripts! + +simple_example.c - most simplest example. generates a random sized buffer + filled with 'A' + +example.c - this is a simple example written in C and should be compiled to a + shared library. Use make to compile it and produce libexamplemutator.so + +example.py - this is the template you can use, the functions are there but they + are empty + +post_library_gif.so.c - fix a fuzz input to ensure it is valid for GIF + +post_library_png.so.c - fix a fuzz input to ensure it is valid for PNG + +simple-chunk-replace.py - this is a simple example where chunks are replaced + +common.py - this can be used for common functions and helpers. + the examples do not use this though. But you can :) + +wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py + +XmlMutatorMin.py - module for XML mutation + +custom_mutator_helpers.h is an header that defines some helper routines +like surgical_havoc_mutate() that allow to perform a randomly chosen +mutation from a subset of the havoc mutations. +If you do so, you have to specify -I /path/to/AFLplusplus/include when +compiling. diff --git a/custom_mutators/examples/XmlMutatorMin.py b/custom_mutators/examples/XmlMutatorMin.py new file mode 100644 index 00000000..3e6cd0ff --- /dev/null +++ b/custom_mutators/examples/XmlMutatorMin.py @@ -0,0 +1,348 @@ +#!/usr/bin/python + +""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """ + +from __future__ import print_function +from copy import deepcopy +from lxml import etree as ET +import random, re, io + + +########################### +# The XmlMutatorMin class # +########################### + + +class XmlMutatorMin: + + """ + Optionals parameters: + seed Seed used by the PRNG (default: "RANDOM") + verbose Verbosity (default: False) + """ + + def __init__(self, seed="RANDOM", verbose=False): + + """ Initialize seed, database and mutators """ + + # Verbosity + self.verbose = verbose + + # Initialize PRNG + self.seed = str(seed) + if self.seed == "RANDOM": + random.seed() + else: + if self.verbose: + print("Static seed '%s'" % self.seed) + random.seed(self.seed) + + # Initialize input and output documents + self.input_tree = None + self.tree = None + + # High-level mutators (no database needed) + hl_mutators_delete = [ + "del_node_and_children", + "del_node_but_children", + "del_attribute", + "del_content", + ] # Delete items + hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values + + # Exposed mutators + self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete + + def __parse_xml(self, xml): + + """ Parse an XML string. Basic wrapper around lxml.parse() """ + + try: + # Function parse() takes care of comments / DTD / processing instructions / ... + tree = ET.parse(io.BytesIO(xml)) + except ET.ParseError: + raise RuntimeError("XML isn't well-formed!") + except LookupError as e: + raise RuntimeError(e) + + # Return a document wrapper + return tree + + def __exec_among(self, module, functions, min_times, max_times): + + """ Randomly execute $functions between $min and $max times """ + + for i in xrange(random.randint(min_times, max_times)): + # Function names are mangled because they are "private" + getattr(module, "_XmlMutatorMin__" + random.choice(functions))() + + def __serialize_xml(self, tree): + + """ Serialize a XML document. Basic wrapper around lxml.tostring() """ + + return ET.tostring( + tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding + ) + + def __ver(self, version): + + """ Helper for displaying lxml version numbers """ + + return ".".join(map(str, version)) + + def reset(self): + + """ Reset the mutator """ + + self.tree = deepcopy(self.input_tree) + + def init_from_string(self, input_string): + + """ Initialize the mutator from a XML string """ + + # Get a pointer to the top-element + self.input_tree = self.__parse_xml(input_string) + + # Get a working copy + self.tree = deepcopy(self.input_tree) + + def save_to_string(self): + + """ Return the current XML document as UTF-8 string """ + + # Return a text version of the tree + return self.__serialize_xml(self.tree) + + def __pick_element(self, exclude_root_node=False): + + """ Pick a random element from the current document """ + + # Get a list of all elements, but nodes like PI and comments + elems = list(self.tree.getroot().iter(tag=ET.Element)) + + # Is the root node excluded? + if exclude_root_node: + start = 1 + else: + start = 0 + + # Pick a random element + try: + elem_id = random.randint(start, len(elems) - 1) + elem = elems[elem_id] + except ValueError: + # Should only occurs if "exclude_root_node = True" + return (None, None) + + return (elem_id, elem) + + def __fuzz_attribute(self): + + """ Fuzz (part of) an attribute value """ + + # Select a node to modify + (rand_elem_id, rand_elem) = self.__pick_element() + + # Get all the attributes + attribs = rand_elem.keys() + + # Is there attributes? + if len(attribs) < 1: + if self.verbose: + print("No attribute: can't replace!") + return + + # Pick a random attribute + rand_attrib_id = random.randint(0, len(attribs) - 1) + rand_attrib = attribs[rand_attrib_id] + + # We have the attribute to modify + # Get its value + attrib_value = rand_elem.get(rand_attrib) + # print("- Value: " + attrib_value) + + # Should we work on the whole value? + func_call = "(?P[a-zA-Z:\-]+)\((?P.*?)\)" + p = re.compile(func_call) + l = p.findall(attrib_value) + if random.choice((True, False)) and l: + # Randomly pick one the function calls + (func, args) = random.choice(l) + # Split by "," and randomly pick one of the arguments + value = random.choice(args.split(",")) + # Remove superfluous characters + unclean_value = value + value = value.strip(" ").strip("'") + # print("Selected argument: [%s]" % value) + else: + value = attrib_value + + # For each type, define some possible replacement values + choices_number = ( + "0", + "11111", + "-128", + "2", + "-1", + "1/3", + "42/0", + "1094861636 idiv 1.0", + "-1123329771506872 idiv 3.8", + "17=$numericRTF", + str(3 + random.randrange(0, 100)), + ) + + choices_letter = ( + "P" * (25 * random.randrange(1, 100)), + "%s%s%s%s%s%s", + "foobar", + ) + + choices_alnum = ( + "Abc123", + "020F0302020204030204", + "020F0302020204030204" * (random.randrange(5, 20)), + ) + + # Fuzz the value + if random.choice((True, False)) and value == "": + + # Empty + new_value = value + + elif random.choice((True, False)) and value.isdigit(): + + # Numbers + new_value = random.choice(choices_number) + + elif random.choice((True, False)) and value.isalpha(): + + # Letters + new_value = random.choice(choices_letter) + + elif random.choice((True, False)) and value.isalnum(): + + # Alphanumeric + new_value = random.choice(choices_alnum) + + else: + + # Default type + new_value = random.choice(choices_alnum + choices_letter + choices_number) + + # If we worked on a substring, apply changes to the whole string + if value != attrib_value: + # No ' around empty values + if new_value != "" and value != "": + new_value = "'" + new_value + "'" + # Apply changes + new_value = attrib_value.replace(unclean_value, new_value) + + # Log something + if self.verbose: + print( + "Fuzzing attribute #%i '%s' of tag #%i '%s'" + % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) + ) + + # Modify the attribute + rand_elem.set(rand_attrib, new_value.decode("utf-8")) + + def __del_node_and_children(self): + + """High-level minimizing mutator + Delete a random node and its children (i.e. delete a random tree)""" + + self.__del_node(True) + + def __del_node_but_children(self): + + """High-level minimizing mutator + Delete a random node but its children (i.e. link them to the parent of the deleted node)""" + + self.__del_node(False) + + def __del_node(self, delete_children): + + """ Called by the __del_node_* mutators """ + + # Select a node to modify (but the root one) + (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True) + + # If the document includes only a top-level element + # Then we can't pick a element (given that "exclude_root_node = True") + + # Is the document deep enough? + if rand_elem is None: + if self.verbose: + print("Can't delete a node: document not deep enough!") + return + + # Log something + if self.verbose: + but_or_and = "and" if delete_children else "but" + print( + "Deleting tag #%i '%s' %s its children" + % (rand_elem_id, rand_elem.tag, but_or_and) + ) + + if delete_children is False: + # Link children of the random (soon to be deleted) node to its parent + for child in rand_elem: + rand_elem.getparent().append(child) + + # Remove the node + rand_elem.getparent().remove(rand_elem) + + def __del_content(self): + + """High-level minimizing mutator + Delete the attributes and children of a random node""" + + # Select a node to modify + (rand_elem_id, rand_elem) = self.__pick_element() + + # Log something + if self.verbose: + print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag)) + + # Reset the node + rand_elem.clear() + + def __del_attribute(self): + + """High-level minimizing mutator + Delete a random attribute from a random node""" + + # Select a node to modify + (rand_elem_id, rand_elem) = self.__pick_element() + + # Get all the attributes + attribs = rand_elem.keys() + + # Is there attributes? + if len(attribs) < 1: + if self.verbose: + print("No attribute: can't delete!") + return + + # Pick a random attribute + rand_attrib_id = random.randint(0, len(attribs) - 1) + rand_attrib = attribs[rand_attrib_id] + + # Log something + if self.verbose: + print( + "Deleting attribute #%i '%s' of tag #%i '%s'" + % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) + ) + + # Delete the attribute + rand_elem.attrib.pop(rand_attrib) + + def mutate(self, min=1, max=5): + + """ Execute some high-level mutators between $min and $max times, then some medium-level ones """ + + # High-level mutation + self.__exec_among(self, self.hl_mutators_all, min, max) diff --git a/custom_mutators/examples/common.py b/custom_mutators/examples/common.py new file mode 100644 index 00000000..44a5056a --- /dev/null +++ b/custom_mutators/examples/common.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +Module containing functions shared between multiple AFL modules + +@author: Christian Holler (:decoder) + +@license: + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +@contact: choller@mozilla.com +""" + +from __future__ import print_function +import random +import os +import re + + +def randel(l): + if not l: + return None + return l[random.randint(0, len(l) - 1)] + + +def randel_pop(l): + if not l: + return None + return l.pop(random.randint(0, len(l) - 1)) + + +def write_exc_example(data, exc): + exc_name = re.sub(r"[^a-zA-Z0-9]", "_", repr(exc)) + + if not os.path.exists(exc_name): + with open(exc_name, "w") as f: + f.write(data) diff --git a/custom_mutators/examples/custom_mutator_helpers.h b/custom_mutators/examples/custom_mutator_helpers.h new file mode 100644 index 00000000..62e6efba --- /dev/null +++ b/custom_mutators/examples/custom_mutator_helpers.h @@ -0,0 +1,342 @@ +#ifndef CUSTOM_MUTATOR_HELPERS +#define CUSTOM_MUTATOR_HELPERS + +#include "config.h" +#include "types.h" +#include + +#define INITIAL_GROWTH_SIZE (64) + +#define RAND_BELOW(limit) (rand() % (limit)) + +/* Use in a struct: creates a name_buf and a name_size variable. */ +#define BUF_VAR(type, name) \ + type * name##_buf; \ + size_t name##_size; +/* this fills in `&structptr->something_buf, &structptr->something_size`. */ +#define BUF_PARAMS(struct, name) \ + (void **)&struct->name##_buf, &struct->name##_size + +typedef struct { + +} afl_t; + +static void surgical_havoc_mutate(u8 *out_buf, s32 begin, s32 end) { + + static s8 interesting_8[] = {INTERESTING_8}; + static s16 interesting_16[] = {INTERESTING_8, INTERESTING_16}; + static s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32}; + + switch (RAND_BELOW(12)) { + + case 0: { + + /* Flip a single bit somewhere. Spooky! */ + + s32 bit_idx = ((RAND_BELOW(end - begin) + begin) << 3) + RAND_BELOW(8); + + out_buf[bit_idx >> 3] ^= 128 >> (bit_idx & 7); + + break; + + } + + case 1: { + + /* Set byte to interesting value. */ + + u8 val = interesting_8[RAND_BELOW(sizeof(interesting_8))]; + out_buf[(RAND_BELOW(end - begin) + begin)] = val; + + break; + + } + + case 2: { + + /* Set word to interesting value, randomly choosing endian. */ + + if (end - begin < 2) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 1) break; + + switch (RAND_BELOW(2)) { + + case 0: + *(u16 *)(out_buf + byte_idx) = + interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]; + break; + case 1: + *(u16 *)(out_buf + byte_idx) = + SWAP16(interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]); + break; + + } + + break; + + } + + case 3: { + + /* Set dword to interesting value, randomly choosing endian. */ + + if (end - begin < 4) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 3) break; + + switch (RAND_BELOW(2)) { + + case 0: + *(u32 *)(out_buf + byte_idx) = + interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; + break; + case 1: + *(u32 *)(out_buf + byte_idx) = + SWAP32(interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); + break; + + } + + break; + + } + + case 4: { + + /* Set qword to interesting value, randomly choosing endian. */ + + if (end - begin < 8) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 7) break; + + switch (RAND_BELOW(2)) { + + case 0: + *(u64 *)(out_buf + byte_idx) = + (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; + break; + case 1: + *(u64 *)(out_buf + byte_idx) = SWAP64( + (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); + break; + + } + + break; + + } + + case 5: { + + /* Randomly subtract from byte. */ + + out_buf[(RAND_BELOW(end - begin) + begin)] -= 1 + RAND_BELOW(ARITH_MAX); + + break; + + } + + case 6: { + + /* Randomly add to byte. */ + + out_buf[(RAND_BELOW(end - begin) + begin)] += 1 + RAND_BELOW(ARITH_MAX); + + break; + + } + + case 7: { + + /* Randomly subtract from word, random endian. */ + + if (end - begin < 2) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 1) break; + + if (RAND_BELOW(2)) { + + *(u16 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); + + } else { + + u16 num = 1 + RAND_BELOW(ARITH_MAX); + + *(u16 *)(out_buf + byte_idx) = + SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) - num); + + } + + break; + + } + + case 8: { + + /* Randomly add to word, random endian. */ + + if (end - begin < 2) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 1) break; + + if (RAND_BELOW(2)) { + + *(u16 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); + + } else { + + u16 num = 1 + RAND_BELOW(ARITH_MAX); + + *(u16 *)(out_buf + byte_idx) = + SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) + num); + + } + + break; + + } + + case 9: { + + /* Randomly subtract from dword, random endian. */ + + if (end - begin < 4) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 3) break; + + if (RAND_BELOW(2)) { + + *(u32 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); + + } else { + + u32 num = 1 + RAND_BELOW(ARITH_MAX); + + *(u32 *)(out_buf + byte_idx) = + SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) - num); + + } + + break; + + } + + case 10: { + + /* Randomly add to dword, random endian. */ + + if (end - begin < 4) break; + + s32 byte_idx = (RAND_BELOW(end - begin) + begin); + + if (byte_idx >= end - 3) break; + + if (RAND_BELOW(2)) { + + *(u32 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); + + } else { + + u32 num = 1 + RAND_BELOW(ARITH_MAX); + + *(u32 *)(out_buf + byte_idx) = + SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) + num); + + } + + break; + + } + + case 11: { + + /* Just set a random byte to a random value. Because, + why not. We use XOR with 1-255 to eliminate the + possibility of a no-op. */ + + out_buf[(RAND_BELOW(end - begin) + begin)] ^= 1 + RAND_BELOW(255); + + break; + + } + + } + +} + +/* This function calculates the next power of 2 greater or equal its argument. + @return The rounded up power of 2 (if no overflow) or 0 on overflow. +*/ +static inline size_t next_pow2(size_t in) { + + if (in == 0 || in > (size_t)-1) + return 0; /* avoid undefined behaviour under-/overflow */ + size_t out = in - 1; + out |= out >> 1; + out |= out >> 2; + out |= out >> 4; + out |= out >> 8; + out |= out >> 16; + return out + 1; + +} + +/* This function makes sure *size is > size_needed after call. + It will realloc *buf otherwise. + *size will grow exponentially as per: + https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/ + Will return NULL and free *buf if size_needed is <1 or realloc failed. + @return For convenience, this function returns *buf. + */ +static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) { + + /* No need to realloc */ + if (likely(size_needed && *size >= size_needed)) return *buf; + + /* No initial size was set */ + if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE; + + /* grow exponentially */ + size_t next_size = next_pow2(size_needed); + + /* handle overflow */ + if (!next_size) { next_size = size_needed; } + + /* alloc */ + *buf = realloc(*buf, next_size); + *size = *buf ? next_size : 0; + + return *buf; + +} + +/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */ +static inline void afl_swap_bufs(void **buf1, size_t *size1, void **buf2, + size_t *size2) { + + void * scratch_buf = *buf1; + size_t scratch_size = *size1; + *buf1 = *buf2; + *size1 = *size2; + *buf2 = scratch_buf; + *size2 = scratch_size; + +} + +#undef INITIAL_GROWTH_SIZE + +#endif + diff --git a/custom_mutators/examples/example.c b/custom_mutators/examples/example.c new file mode 100644 index 00000000..23add128 --- /dev/null +++ b/custom_mutators/examples/example.c @@ -0,0 +1,376 @@ +/* + New Custom Mutator for AFL++ + Written by Khaled Yakdan + Andrea Fioraldi + Shengtuo Hu + Dominik Maier +*/ + +// You need to use -I /path/to/AFLplusplus/include +#include "custom_mutator_helpers.h" + +#include +#include +#include +#include + +#define DATA_SIZE (100) + +static const char *commands[] = { + + "GET", + "PUT", + "DEL", + +}; + +typedef struct my_mutator { + + afl_t *afl; + + // any additional data here! + size_t trim_size_current; + int trimmming_steps; + int cur_step; + + // Reused buffers: + BUF_VAR(u8, fuzz); + BUF_VAR(u8, data); + BUF_VAR(u8, havoc); + BUF_VAR(u8, trim); + BUF_VAR(u8, post_process); + +} my_mutator_t; + +/** + * Initialize this custom mutator + * + * @param[in] afl a pointer to the internal state object. Can be ignored for + * now. + * @param[in] seed A seed for this mutator - the same seed should always mutate + * in the same way. + * @return Pointer to the data object this custom mutator instance should use. + * There may be multiple instances of this mutator in one afl-fuzz run! + * Return NULL on error. + */ +my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { + + srand(seed); // needed also by surgical_havoc_mutate() + + my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); + if (!data) { + + perror("afl_custom_init alloc"); + return NULL; + + } + + data->afl = afl; + + return data; + +} + +/** + * Perform custom mutations on a given input + * + * (Optional for now. Required in the future) + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @param[in] buf Pointer to input data to be mutated + * @param[in] buf_size Size of input data + * @param[out] out_buf the buffer we will work on. we can reuse *buf. NULL on + * error. + * @param[in] add_buf Buffer containing the additional test case + * @param[in] add_buf_size Size of the additional test case + * @param[in] max_size Maximum size of the mutated output. The mutation must not + * produce data larger than max_size. + * @return Size of the mutated output. + */ +size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, + u8 **out_buf, uint8_t *add_buf, + size_t add_buf_size, // add_buf can be NULL + size_t max_size) { + + // Make sure that the packet size does not exceed the maximum size expected by + // the fuzzer + size_t mutated_size = DATA_SIZE <= max_size ? DATA_SIZE : max_size; + + // maybe_grow is optimized to be quick for reused buffers. + u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), mutated_size); + if (!mutated_out) { + + *out_buf = NULL; + perror("custom mutator allocation (maybe_grow)"); + return 0; /* afl-fuzz will very likely error out after this. */ + + } + + // Randomly select a command string to add as a header to the packet + memcpy(mutated_out, commands[rand() % 3], 3); + + // Mutate the payload of the packet + int i; + for (i = 0; i < 8; ++i) { + + // Randomly perform one of the (no len modification) havoc mutations + surgical_havoc_mutate(mutated_out, 3, mutated_size); + + } + + *out_buf = mutated_out; + return mutated_size; + +} + +/** + * A post-processing function to use right before AFL writes the test case to + * disk in order to execute the target. + * + * (Optional) If this functionality is not needed, simply don't define this + * function. + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @param[in] buf Buffer containing the test case to be executed + * @param[in] buf_size Size of the test case + * @param[out] out_buf Pointer to the buffer containing the test case after + * processing. External library should allocate memory for out_buf. + * The buf pointer may be reused (up to the given buf_size); + * @return Size of the output buffer after processing or the needed amount. + * A return of 0 indicates an error. + */ +size_t afl_custom_post_process(my_mutator_t *data, uint8_t *buf, + size_t buf_size, uint8_t **out_buf) { + + uint8_t *post_process_buf = + maybe_grow(BUF_PARAMS(data, post_process), buf_size + 5); + if (!post_process_buf) { + + perror("custom mutator realloc failed."); + *out_buf = NULL; + return 0; + + } + + memcpy(post_process_buf + 5, buf, buf_size); + post_process_buf[0] = 'A'; + post_process_buf[1] = 'F'; + post_process_buf[2] = 'L'; + post_process_buf[3] = '+'; + post_process_buf[4] = '+'; + + *out_buf = post_process_buf; + + return buf_size + 5; + +} + +/** + * This method is called at the start of each trimming operation and receives + * the initial buffer. It should return the amount of iteration steps possible + * on this input (e.g. if your input has n elements and you want to remove + * them one by one, return n, if you do a binary search, return log(n), + * and so on...). + * + * If your trimming algorithm doesn't allow you to determine the amount of + * (remaining) steps easily (esp. while running), then you can alternatively + * return 1 here and always return 0 in post_trim until you are finished and + * no steps remain. In that case, returning 1 in post_trim will end the + * trimming routine. The whole current index/max iterations stuff is only used + * to show progress. + * + * (Optional) + * + * @param data pointer returned in afl_custom_init for this fuzz case + * @param buf Buffer containing the test case + * @param buf_size Size of the test case + * @return The amount of possible iteration steps to trim the input. + * negative on error. + */ +int32_t afl_custom_init_trim(my_mutator_t *data, uint8_t *buf, + size_t buf_size) { + + // We simply trim once + data->trimmming_steps = 1; + + data->cur_step = 0; + + if (!maybe_grow(BUF_PARAMS(data, trim), buf_size)) { + + perror("init_trim grow"); + return -1; + + } + + memcpy(data->trim_buf, buf, buf_size); + + data->trim_size_current = buf_size; + + return data->trimmming_steps; + +} + +/** + * This method is called for each trimming operation. It doesn't have any + * arguments because we already have the initial buffer from init_trim and we + * can memorize the current state in *data. This can also save + * reparsing steps for each iteration. It should return the trimmed input + * buffer, where the returned data must not exceed the initial input data in + * length. Returning anything that is larger than the original data (passed + * to init_trim) will result in a fatal abort of AFLFuzz. + * + * (Optional) + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @param[out] out_buf Pointer to the buffer containing the trimmed test case. + * External library should allocate memory for out_buf. + * AFL++ will not release the memory after saving the test case. + * Keep a ref in *data. + * *out_buf = NULL is treated as error. + * @return Pointer to the size of the trimmed test case + */ +size_t afl_custom_trim(my_mutator_t *data, uint8_t **out_buf) { + + *out_buf = data->trim_buf; + + // Remove the last byte of the trimming input + return data->trim_size_current - 1; + +} + +/** + * This method is called after each trim operation to inform you if your + * trimming step was successful or not (in terms of coverage). If you receive + * a failure here, you should reset your input to the last known good state. + * + * (Optional) + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @param success Indicates if the last trim operation was successful. + * @return The next trim iteration index (from 0 to the maximum amount of + * steps returned in init_trim). negative ret on failure. + */ +int32_t afl_custom_post_trim(my_mutator_t *data, int success) { + + if (success) { + + ++data->cur_step; + return data->cur_step; + + } + + return data->trimmming_steps; + +} + +/** + * Perform a single custom mutation on a given input. + * This mutation is stacked with the other muatations in havoc. + * + * (Optional) + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @param[in] buf Pointer to the input data to be mutated and the mutated + * output + * @param[in] buf_size Size of input data + * @param[out] out_buf The output buffer. buf can be reused, if the content + * fits. *out_buf = NULL is treated as error. + * @param[in] max_size Maximum size of the mutated output. The mutation must + * not produce data larger than max_size. + * @return Size of the mutated output. + */ +size_t afl_custom_havoc_mutation(my_mutator_t *data, u8 *buf, size_t buf_size, + u8 **out_buf, size_t max_size) { + + if (buf_size == 0) { + + *out_buf = maybe_grow(BUF_PARAMS(data, havoc), 1); + if (!*out_buf) { + + perror("custom havoc: maybe_grow"); + return 0; + + } + + **out_buf = rand() % 256; + buf_size = 1; + + } else { + + // We reuse buf here. It's legal and faster. + *out_buf = buf; + + } + + size_t victim = rand() % buf_size; + (*out_buf)[victim] += rand() % 10; + + return buf_size; + +} + +/** + * Return the probability (in percentage) that afl_custom_havoc_mutation + * is called in havoc. By default it is 6 %. + * + * (Optional) + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @return The probability (0-100). + */ +uint8_t afl_custom_havoc_mutation_probability(my_mutator_t *data) { + + return 5; // 5 % + +} + +/** + * Determine whether the fuzzer should fuzz the queue entry or not. + * + * (Optional) + * + * @param[in] data pointer returned in afl_custom_init for this fuzz case + * @param filename File name of the test case in the queue entry + * @return Return True(1) if the fuzzer will fuzz the queue entry, and + * False(0) otherwise. + */ +uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) { + + return 1; + +} + +/** + * Allow for additional analysis (e.g. calling a different tool that does a + * different kind of coverage and saves this for the custom mutator). + * + * (Optional) + * + * @param data pointer returned in afl_custom_init for this fuzz case + * @param filename_new_queue File name of the new queue entry + * @param filename_orig_queue File name of the original queue entry + */ +void afl_custom_queue_new_entry(my_mutator_t * data, + const uint8_t *filename_new_queue, + const uint8_t *filename_orig_queue) { + + /* Additional analysis on the original or new test case */ + +} + +/** + * Deinitialize everything + * + * @param data The data ptr from afl_custom_init + */ +void afl_custom_deinit(my_mutator_t *data) { + + free(data->post_process_buf); + free(data->havoc_buf); + free(data->data_buf); + free(data->fuzz_buf); + free(data->trim_buf); + free(data); + +} + diff --git a/custom_mutators/examples/example.py b/custom_mutators/examples/example.py new file mode 100644 index 00000000..3a6d22e4 --- /dev/null +++ b/custom_mutators/examples/example.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +Example Python Module for AFLFuzz + +@author: Christian Holler (:decoder) + +@license: + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +@contact: choller@mozilla.com +""" + +import random + + +COMMANDS = [ + b"GET", + b"PUT", + b"DEL", + b"AAAAAAAAAAAAAAAAA", +] + + +def init(seed): + """ + Called once when AFLFuzz starts up. Used to seed our RNG. + + @type seed: int + @param seed: A 32-bit random value + """ + random.seed(seed) + + +def deinit(): + pass + + +def fuzz(buf, add_buf, max_size): + """ + Called per fuzzing iteration. + + @type buf: bytearray + @param buf: The buffer that should be mutated. + + @type add_buf: bytearray + @param add_buf: A second buffer that can be used as mutation source. + + @type max_size: int + @param max_size: Maximum size of the mutated output. The mutation must not + produce data larger than max_size. + + @rtype: bytearray + @return: A new bytearray containing the mutated data + """ + ret = bytearray(100) + + ret[:3] = random.choice(COMMANDS) + + return ret + + +# Uncomment and implement the following methods if you want to use a custom +# trimming algorithm. See also the documentation for a better API description. + +# def init_trim(buf): +# ''' +# Called per trimming iteration. +# +# @type buf: bytearray +# @param buf: The buffer that should be trimmed. +# +# @rtype: int +# @return: The maximum number of trimming steps. +# ''' +# global ... +# +# # Initialize global variables +# +# # Figure out how many trimming steps are possible. +# # If this is not possible for your trimming, you can +# # return 1 instead and always return 0 in post_trim +# # until you are done (then you return 1). +# +# return steps +# +# def trim(): +# ''' +# Called per trimming iteration. +# +# @rtype: bytearray +# @return: A new bytearray containing the trimmed data. +# ''' +# global ... +# +# # Implement the actual trimming here +# +# return bytearray(...) +# +# def post_trim(success): +# ''' +# Called after each trimming operation. +# +# @type success: bool +# @param success: Indicates if the last trim operation was successful. +# +# @rtype: int +# @return: The next trim index (0 to max number of steps) where max +# number of steps indicates the trimming is done. +# ''' +# global ... +# +# if not success: +# # Restore last known successful input, determine next index +# else: +# # Just determine the next index, based on what was successfully +# # removed in the last step +# +# return next_index +# +# def post_process(buf): +# ''' +# Called just before the execution to write the test case in the format +# expected by the target +# +# @type buf: bytearray +# @param buf: The buffer containing the test case to be executed +# +# @rtype: bytearray +# @return: The buffer containing the test case after +# ''' +# return buf +# +# def havoc_mutation(buf, max_size): +# ''' +# Perform a single custom mutation on a given input. +# +# @type buf: bytearray +# @param buf: The buffer that should be mutated. +# +# @type max_size: int +# @param max_size: Maximum size of the mutated output. The mutation must not +# produce data larger than max_size. +# +# @rtype: bytearray +# @return: A new bytearray containing the mutated data +# ''' +# return mutated_buf +# +# def havoc_mutation_probability(): +# ''' +# Called for each `havoc_mutation`. Return the probability (in percentage) +# that `havoc_mutation` is called in havoc. Be default it is 6%. +# +# @rtype: int +# @return: The probability (0-100) +# ''' +# return prob +# +# def queue_get(filename): +# ''' +# Called at the beginning of each fuzz iteration to determine whether the +# test case should be fuzzed +# +# @type filename: str +# @param filename: File name of the test case in the current queue entry +# +# @rtype: bool +# @return: Return True if the custom mutator decides to fuzz the test case, +# and False otherwise +# ''' +# return True +# +# def queue_new_entry(filename_new_queue, filename_orig_queue): +# ''' +# Called after adding a new test case to the queue +# +# @type filename_new_queue: str +# @param filename_new_queue: File name of the new queue entry +# +# @type filename_orig_queue: str +# @param filename_orig_queue: File name of the original queue entry +# ''' +# pass diff --git a/custom_mutators/examples/post_library_gif.so.c b/custom_mutators/examples/post_library_gif.so.c new file mode 100644 index 00000000..ac10f409 --- /dev/null +++ b/custom_mutators/examples/post_library_gif.so.c @@ -0,0 +1,165 @@ +/* + american fuzzy lop++ - postprocessor library example + -------------------------------------------------- + + Originally written by Michal Zalewski + Edited by Dominik Maier, 2020 + + Copyright 2015 Google Inc. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + Postprocessor libraries can be passed to afl-fuzz to perform final cleanup + of any mutated test cases - for example, to fix up checksums in PNG files. + + Please heed the following warnings: + + 1) In almost all cases, it is more productive to comment out checksum logic + in the targeted binary (as shown in ../libpng_no_checksum/). One possible + exception is the process of fuzzing binary-only software in QEMU mode. + + 2) The use of postprocessors for anything other than checksums is + questionable and may cause more harm than good. AFL is normally pretty good + about dealing with length fields, magic values, etc. + + 3) Postprocessors that do anything non-trivial must be extremely robust to + gracefully handle malformed data and other error conditions - otherwise, + they will crash and take afl-fuzz down with them. Be wary of reading past + *len and of integer overflows when calculating file offsets. + + In other words, THIS IS PROBABLY NOT WHAT YOU WANT - unless you really, + honestly know what you're doing =) + + With that out of the way: the postprocessor library is passed to afl-fuzz + via AFL_POST_LIBRARY. The library must be compiled with: + + gcc -shared -Wall -O3 post_library.so.c -o post_library.so + + AFL will call the afl_custom_post_process() function for every mutated output + buffer. From there, you have three choices: + + 1) If you don't want to modify the test case, simply set `*out_buf = in_buf` + and return the original `len`. + + 2) If you want to skip this test case altogether and have AFL generate a + new one, return 0 or set `*out_buf = NULL`. + Use this sparingly - it's faster than running the target program + with patently useless inputs, but still wastes CPU time. + + 3) If you want to modify the test case, allocate an appropriately-sized + buffer, move the data into that buffer, make the necessary changes, and + then return the new pointer as out_buf. Return an appropriate len + afterwards. + + Note that the buffer will *not* be freed for you. To avoid memory leaks, + you need to free it or reuse it on subsequent calls (as shown below). + + *** Feel free to reuse the original 'in_buf' BUFFER and return it. *** + + Aight. The example below shows a simple postprocessor that tries to make + sure that all input files start with "GIF89a". + + PS. If you don't like C, you can try out the unix-based wrapper from + Ben Nagy instead: https://github.com/bnagy/aflfix + + */ + +#include +#include +#include + +/* Header that must be present at the beginning of every test case: */ + +#define HEADER "GIF89a" + +typedef struct post_state { + + unsigned char *buf; + size_t size; + +} post_state_t; + +void *afl_custom_init(void *afl) { + + post_state_t *state = malloc(sizeof(post_state_t)); + if (!state) { + + perror("malloc"); + return NULL; + + } + + state->buf = calloc(sizeof(unsigned char), 4096); + if (!state->buf) { + + free(state); + perror("calloc"); + return NULL; + + } + + return state; + +} + +/* The actual postprocessor routine called by afl-fuzz: */ + +size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, + unsigned int len, unsigned char **out_buf) { + + /* Skip execution altogether for buffers shorter than 6 bytes (just to + show how it's done). We can trust len to be sane. */ + + if (len < strlen(HEADER)) return 0; + + /* Do nothing for buffers that already start with the expected header. */ + + if (!memcmp(in_buf, HEADER, strlen(HEADER))) { + + *out_buf = in_buf; + return len; + + } + + /* Allocate memory for new buffer, reusing previous allocation if + possible. */ + + *out_buf = realloc(data->buf, len); + + /* If we're out of memory, the most graceful thing to do is to return the + original buffer and give up on modifying it. Let AFL handle OOM on its + own later on. */ + + if (!*out_buf) { + + *out_buf = in_buf; + return len; + + } + + /* Copy the original data to the new location. */ + + memcpy(*out_buf, in_buf, len); + + /* Insert the new header. */ + + memcpy(*out_buf, HEADER, strlen(HEADER)); + + /* Return the new len. It hasn't changed, so it's just len. */ + + return len; + +} + +/* Gets called afterwards */ +void afl_custom_deinit(post_state_t *data) { + + free(data->buf); + free(data); + +} + diff --git a/custom_mutators/examples/post_library_png.so.c b/custom_mutators/examples/post_library_png.so.c new file mode 100644 index 00000000..941f7e55 --- /dev/null +++ b/custom_mutators/examples/post_library_png.so.c @@ -0,0 +1,163 @@ +/* + american fuzzy lop++ - postprocessor for PNG + ------------------------------------------ + + Originally written by Michal Zalewski + + Copyright 2015 Google Inc. All rights reserved. + Adapted to the new API, 2020 by Dominik Maier + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + See post_library.so.c for a general discussion of how to implement + postprocessors. This specific postprocessor attempts to fix up PNG + checksums, providing a slightly more complicated example than found + in post_library.so.c. + + Compile with: + + gcc -shared -Wall -O3 post_library_png.so.c -o post_library_png.so -lz + + */ + +#include +#include +#include +#include +#include + +#include + +/* A macro to round an integer up to 4 kB. */ + +#define UP4K(_i) ((((_i) >> 12) + 1) << 12) + +typedef struct post_state { + + unsigned char *buf; + size_t size; + +} post_state_t; + +void *afl_custom_init(void *afl) { + + post_state_t *state = malloc(sizeof(post_state_t)); + if (!state) { + + perror("malloc"); + return NULL; + + } + + state->buf = calloc(sizeof(unsigned char), 4096); + if (!state->buf) { + + free(state); + perror("calloc"); + return NULL; + + } + + return state; + +} + +size_t afl_custom_post_process(post_state_t *data, const unsigned char *in_buf, + unsigned int len, + const unsigned char **out_buf) { + + unsigned char *new_buf = (unsigned char *)in_buf; + unsigned int pos = 8; + + /* Don't do anything if there's not enough room for the PNG header + (8 bytes). */ + + if (len < 8) { + + *out_buf = in_buf; + return len; + + } + + /* Minimum size of a zero-length PNG chunk is 12 bytes; if we + don't have that, we can bail out. */ + + while (pos + 12 <= len) { + + unsigned int chunk_len, real_cksum, file_cksum; + + /* Chunk length is the first big-endian dword in the chunk. */ + + chunk_len = ntohl(*(uint32_t *)(in_buf + pos)); + + /* Bail out if chunk size is too big or goes past EOF. */ + + if (chunk_len > 1024 * 1024 || pos + 12 + chunk_len > len) break; + + /* Chunk checksum is calculated for chunk ID (dword) and the actual + payload. */ + + real_cksum = htonl(crc32(0, in_buf + pos + 4, chunk_len + 4)); + + /* The in-file checksum is the last dword past the chunk data. */ + + file_cksum = *(uint32_t *)(in_buf + pos + 8 + chunk_len); + + /* If the checksums do not match, we need to fix the file. */ + + if (real_cksum != file_cksum) { + + /* First modification? Make a copy of the input buffer. Round size + up to 4 kB to minimize the number of reallocs needed. */ + + if (new_buf == in_buf) { + + if (len <= data->size) { + + new_buf = data->buf; + + } else { + + new_buf = realloc(data->buf, UP4K(len)); + if (!new_buf) { + + *out_buf = in_buf; + return len; + + } + + data->buf = new_buf; + data->size = UP4K(len); + memcpy(new_buf, in_buf, len); + + } + + } + + *(uint32_t *)(new_buf + pos + 8 + chunk_len) = real_cksum; + + } + + /* Skip the entire chunk and move to the next one. */ + + pos += 12 + chunk_len; + + } + + *out_buf = new_buf; + return len; + +} + +/* Gets called afterwards */ +void afl_custom_deinit(post_state_t *data) { + + free(data->buf); + free(data); + +} + diff --git a/custom_mutators/examples/simple-chunk-replace.py b/custom_mutators/examples/simple-chunk-replace.py new file mode 100644 index 00000000..c57218dd --- /dev/null +++ b/custom_mutators/examples/simple-chunk-replace.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +Simple Chunk Cross-Over Replacement Module for AFLFuzz + +@author: Christian Holler (:decoder) + +@license: + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +@contact: choller@mozilla.com +""" + +import random + + +def init(seed): + """ + Called once when AFLFuzz starts up. Used to seed our RNG. + + @type seed: int + @param seed: A 32-bit random value + """ + # Seed our RNG + random.seed(seed) + + +def fuzz(buf, add_buf, max_size): + """ + Called per fuzzing iteration. + + @type buf: bytearray + @param buf: The buffer that should be mutated. + + @type add_buf: bytearray + @param add_buf: A second buffer that can be used as mutation source. + + @type max_size: int + @param max_size: Maximum size of the mutated output. The mutation must not + produce data larger than max_size. + + @rtype: bytearray + @return: A new bytearray containing the mutated data + """ + # Make a copy of our input buffer for returning + ret = bytearray(buf) + + # Take a random fragment length between 2 and 32 (or less if add_buf is shorter) + fragment_len = random.randint(1, min(len(add_buf), 32)) + + # Determine a random source index where to take the data chunk from + rand_src_idx = random.randint(0, len(add_buf) - fragment_len) + + # Determine a random destination index where to put the data chunk + rand_dst_idx = random.randint(0, len(buf)) + + # Make the chunk replacement + ret[rand_dst_idx : rand_dst_idx + fragment_len] = add_buf[ + rand_src_idx : rand_src_idx + fragment_len + ] + + # Return data + return ret diff --git a/custom_mutators/examples/simple_example.c b/custom_mutators/examples/simple_example.c new file mode 100644 index 00000000..d888ec1f --- /dev/null +++ b/custom_mutators/examples/simple_example.c @@ -0,0 +1,74 @@ +// This simple example just creates random buffer <= 100 filled with 'A' +// needs -I /path/to/AFLplusplus/include +#include "custom_mutator_helpers.h" + +#include +#include +#include +#include + +#ifndef _FIXED_CHAR + #define _FIXED_CHAR 0x41 +#endif + +typedef struct my_mutator { + + afl_t *afl; + + // Reused buffers: + BUF_VAR(u8, fuzz); + +} my_mutator_t; + +my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { + + srand(seed); + my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); + if (!data) { + + perror("afl_custom_init alloc"); + return NULL; + + } + + data->afl = afl; + + return data; + +} + +size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, + u8 **out_buf, uint8_t *add_buf, + size_t add_buf_size, // add_buf can be NULL + size_t max_size) { + + int size = (rand() % 100) + 1; + if (size > max_size) size = max_size; + u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), size); + if (!mutated_out) { + + *out_buf = NULL; + perror("custom mutator allocation (maybe_grow)"); + return 0; /* afl-fuzz will very likely error out after this. */ + + } + + memset(mutated_out, _FIXED_CHAR, size); + + *out_buf = mutated_out; + return size; + +} + +/** + * Deinitialize everything + * + * @param data The data ptr from afl_custom_init + */ +void afl_custom_deinit(my_mutator_t *data) { + + free(data->fuzz_buf); + free(data); + +} + diff --git a/custom_mutators/examples/wrapper_afl_min.py b/custom_mutators/examples/wrapper_afl_min.py new file mode 100644 index 00000000..5cd60031 --- /dev/null +++ b/custom_mutators/examples/wrapper_afl_min.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python + +from XmlMutatorMin import XmlMutatorMin + +# Default settings (production mode) + +__mutator__ = None +__seed__ = "RANDOM" +__log__ = False +__log_file__ = "wrapper.log" + + +# AFL functions +def log(text): + """ + Logger + """ + + global __seed__ + global __log__ + global __log_file__ + + if __log__: + with open(__log_file__, "a") as logf: + logf.write("[%s] %s\n" % (__seed__, text)) + + +def init(seed): + """ + Called once when AFL starts up. Seed is used to identify the AFL instance in log files + """ + + global __mutator__ + global __seed__ + + # Get the seed + __seed__ = seed + + # Create a global mutation class + try: + __mutator__ = XmlMutatorMin(__seed__, verbose=__log__) + log("init(): Mutator created") + except RuntimeError as e: + log("init(): Can't create mutator: %s" % e.message) + + +def fuzz(buf, add_buf, max_size): + """ + Called for each fuzzing iteration. + """ + + global __mutator__ + + # Do we have a working mutator object? + if __mutator__ is None: + log("fuzz(): Can't fuzz, no mutator available") + return buf + + # Try to use the AFL buffer + via_buffer = True + + # Interpret the AFL buffer (an array of bytes) as a string + if via_buffer: + try: + buf_str = str(buf) + log("fuzz(): AFL buffer converted to a string") + except Exception: + via_buffer = False + log("fuzz(): Can't convert AFL buffer to a string") + + # Load XML from the AFL string + if via_buffer: + try: + __mutator__.init_from_string(buf_str) + log( + "fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" + % len(buf_str) + ) + except Exception: + via_buffer = False + log("fuzz(): Can't initialize mutator with AFL buffer") + + # If init from AFL buffer wasn't succesful + if not via_buffer: + log("fuzz(): Returning unmodified AFL buffer") + return buf + + # Sucessful initialization -> mutate + try: + __mutator__.mutate(max=5) + log("fuzz(): Input mutated") + except Exception: + log("fuzz(): Can't mutate input => returning buf") + return buf + + # Convert mutated data to a array of bytes + try: + data = bytearray(__mutator__.save_to_string()) + log("fuzz(): Mutated data converted as bytes") + except Exception: + log("fuzz(): Can't convert mutated data to bytes => returning buf") + return buf + + # Everything went fine, returning mutated content + log("fuzz(): Returning %d bytes" % len(data)) + return data + + +# Main (for debug) +if __name__ == "__main__": + + __log__ = True + __log_file__ = "/dev/stdout" + __seed__ = "RANDOM" + + init(__seed__) + + in_1 = bytearray( + "ffffzzzzzzzzzzzz" + ) + in_2 = bytearray("") + out = fuzz(in_1, in_2) + print(out) diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 62e01f83..9d5381e8 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -285,8 +285,8 @@ afl-fuzz /path/to/program ## 4) Example -Please see [example.c](../utils/custom_mutators/example.c) and -[example.py](../utils/custom_mutators/example.py) +Please see [example.c](../custom_mutators/examples/example.c) and +[example.py](../custom_mutators/examples/example.py) ## 5) Other Resources diff --git a/docs/life_pro_tips.md b/docs/life_pro_tips.md index 50ad75d4..f9ac1c53 100644 --- a/docs/life_pro_tips.md +++ b/docs/life_pro_tips.md @@ -83,5 +83,5 @@ You can find a simple solution in utils/argv_fuzzing. ## Attacking a format that uses checksums? Remove the checksum-checking code or use a postprocessor! -See utils/custom_mutators/ for more. +See `afl_custom_post_process` in custom_mutators/examples/examples.c for more. diff --git a/qemu_mode/README.md b/qemu_mode/README.md index 4aa2133e..38cb5ba6 100644 --- a/qemu_mode/README.md +++ b/qemu_mode/README.md @@ -190,8 +190,8 @@ handlers of the target. ## 13) Gotchas, feedback, bugs -If you need to fix up checksums or do other cleanup on mutated test cases, see -utils/custom_mutators/ for a viable solution. +If you need to fix up checksums or do other cleanups on mutated test cases, see +`afl_custom_post_process` in custom_mutators/examples/example.c for a viable solution. Do not mix QEMU mode with ASAN, MSAN, or the likes; QEMU doesn't appreciate the "shadow VM" trick employed by the sanitizers and will probably just diff --git a/test/test-custom-mutators.sh b/test/test-custom-mutators.sh index bae4220f..a5d666ff 100755 --- a/test/test-custom-mutators.sh +++ b/test/test-custom-mutators.sh @@ -29,8 +29,8 @@ test "1" = "`../afl-fuzz | grep -i 'without python' >/dev/null; echo $?`" && { } } # Compile the custom mutator - cc -D_FIXED_CHAR=0x41 -g -fPIC -shared -I../include ../utils/custom_mutators/simple_example.c -o libexamplemutator.so > /dev/null 2>&1 - cc -D_FIXED_CHAR=0x42 -g -fPIC -shared -I../include ../utils/custom_mutators/simple_example.c -o libexamplemutator2.so > /dev/null 2>&1 + cc -D_FIXED_CHAR=0x41 -g -fPIC -shared -I../include ../custom_mutators/examples/simple_example.c -o libexamplemutator.so > /dev/null 2>&1 + cc -D_FIXED_CHAR=0x42 -g -fPIC -shared -I../include ../custom_mutators/examples/simple_example.c -o libexamplemutator2.so > /dev/null 2>&1 test -e test-custom-mutator -a -e ./libexamplemutator.so && { # Create input directory mkdir -p in diff --git a/utils/custom_mutators/Makefile b/utils/custom_mutators/Makefile deleted file mode 100644 index 9849f3f4..00000000 --- a/utils/custom_mutators/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -all: libexamplemutator.so - -libexamplemutator.so: - $(CC) $(CFLAGS) -D_FORTIFY_SOURCE=2 -O3 -fPIC -shared -g -I ../../include example.c -o libexamplemutator.so - -clean: - rm -rf libexamplemutator.so diff --git a/utils/custom_mutators/README.md b/utils/custom_mutators/README.md deleted file mode 100644 index 655f7a5e..00000000 --- a/utils/custom_mutators/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Examples for the custom mutator - -These are example and helper files for the custom mutator feature. -See [docs/custom_mutators.md](../../docs/custom_mutators.md) for more information - -Note that if you compile with python3.7 you must use python3 scripts, and if -you use python2.7 to compile python2 scripts! - -simple_example.c - most simplest example. generates a random sized buffer - filled with 'A' - -example.c - this is a simple example written in C and should be compiled to a - shared library. Use make to compile it and produce libexamplemutator.so - -example.py - this is the template you can use, the functions are there but they - are empty - -post_library_gif.so.c - fix a fuzz input to ensure it is valid for GIF - -post_library_png.so.c - fix a fuzz input to ensure it is valid for PNG - -simple-chunk-replace.py - this is a simple example where chunks are replaced - -common.py - this can be used for common functions and helpers. - the examples do not use this though. But you can :) - -wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py - -XmlMutatorMin.py - module for XML mutation - -custom_mutator_helpers.h is an header that defines some helper routines -like surgical_havoc_mutate() that allow to perform a randomly chosen -mutation from a subset of the havoc mutations. -If you do so, you have to specify -I /path/to/AFLplusplus/include when -compiling. diff --git a/utils/custom_mutators/XmlMutatorMin.py b/utils/custom_mutators/XmlMutatorMin.py deleted file mode 100644 index 3e6cd0ff..00000000 --- a/utils/custom_mutators/XmlMutatorMin.py +++ /dev/null @@ -1,348 +0,0 @@ -#!/usr/bin/python - -""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """ - -from __future__ import print_function -from copy import deepcopy -from lxml import etree as ET -import random, re, io - - -########################### -# The XmlMutatorMin class # -########################### - - -class XmlMutatorMin: - - """ - Optionals parameters: - seed Seed used by the PRNG (default: "RANDOM") - verbose Verbosity (default: False) - """ - - def __init__(self, seed="RANDOM", verbose=False): - - """ Initialize seed, database and mutators """ - - # Verbosity - self.verbose = verbose - - # Initialize PRNG - self.seed = str(seed) - if self.seed == "RANDOM": - random.seed() - else: - if self.verbose: - print("Static seed '%s'" % self.seed) - random.seed(self.seed) - - # Initialize input and output documents - self.input_tree = None - self.tree = None - - # High-level mutators (no database needed) - hl_mutators_delete = [ - "del_node_and_children", - "del_node_but_children", - "del_attribute", - "del_content", - ] # Delete items - hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values - - # Exposed mutators - self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete - - def __parse_xml(self, xml): - - """ Parse an XML string. Basic wrapper around lxml.parse() """ - - try: - # Function parse() takes care of comments / DTD / processing instructions / ... - tree = ET.parse(io.BytesIO(xml)) - except ET.ParseError: - raise RuntimeError("XML isn't well-formed!") - except LookupError as e: - raise RuntimeError(e) - - # Return a document wrapper - return tree - - def __exec_among(self, module, functions, min_times, max_times): - - """ Randomly execute $functions between $min and $max times """ - - for i in xrange(random.randint(min_times, max_times)): - # Function names are mangled because they are "private" - getattr(module, "_XmlMutatorMin__" + random.choice(functions))() - - def __serialize_xml(self, tree): - - """ Serialize a XML document. Basic wrapper around lxml.tostring() """ - - return ET.tostring( - tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding - ) - - def __ver(self, version): - - """ Helper for displaying lxml version numbers """ - - return ".".join(map(str, version)) - - def reset(self): - - """ Reset the mutator """ - - self.tree = deepcopy(self.input_tree) - - def init_from_string(self, input_string): - - """ Initialize the mutator from a XML string """ - - # Get a pointer to the top-element - self.input_tree = self.__parse_xml(input_string) - - # Get a working copy - self.tree = deepcopy(self.input_tree) - - def save_to_string(self): - - """ Return the current XML document as UTF-8 string """ - - # Return a text version of the tree - return self.__serialize_xml(self.tree) - - def __pick_element(self, exclude_root_node=False): - - """ Pick a random element from the current document """ - - # Get a list of all elements, but nodes like PI and comments - elems = list(self.tree.getroot().iter(tag=ET.Element)) - - # Is the root node excluded? - if exclude_root_node: - start = 1 - else: - start = 0 - - # Pick a random element - try: - elem_id = random.randint(start, len(elems) - 1) - elem = elems[elem_id] - except ValueError: - # Should only occurs if "exclude_root_node = True" - return (None, None) - - return (elem_id, elem) - - def __fuzz_attribute(self): - - """ Fuzz (part of) an attribute value """ - - # Select a node to modify - (rand_elem_id, rand_elem) = self.__pick_element() - - # Get all the attributes - attribs = rand_elem.keys() - - # Is there attributes? - if len(attribs) < 1: - if self.verbose: - print("No attribute: can't replace!") - return - - # Pick a random attribute - rand_attrib_id = random.randint(0, len(attribs) - 1) - rand_attrib = attribs[rand_attrib_id] - - # We have the attribute to modify - # Get its value - attrib_value = rand_elem.get(rand_attrib) - # print("- Value: " + attrib_value) - - # Should we work on the whole value? - func_call = "(?P[a-zA-Z:\-]+)\((?P.*?)\)" - p = re.compile(func_call) - l = p.findall(attrib_value) - if random.choice((True, False)) and l: - # Randomly pick one the function calls - (func, args) = random.choice(l) - # Split by "," and randomly pick one of the arguments - value = random.choice(args.split(",")) - # Remove superfluous characters - unclean_value = value - value = value.strip(" ").strip("'") - # print("Selected argument: [%s]" % value) - else: - value = attrib_value - - # For each type, define some possible replacement values - choices_number = ( - "0", - "11111", - "-128", - "2", - "-1", - "1/3", - "42/0", - "1094861636 idiv 1.0", - "-1123329771506872 idiv 3.8", - "17=$numericRTF", - str(3 + random.randrange(0, 100)), - ) - - choices_letter = ( - "P" * (25 * random.randrange(1, 100)), - "%s%s%s%s%s%s", - "foobar", - ) - - choices_alnum = ( - "Abc123", - "020F0302020204030204", - "020F0302020204030204" * (random.randrange(5, 20)), - ) - - # Fuzz the value - if random.choice((True, False)) and value == "": - - # Empty - new_value = value - - elif random.choice((True, False)) and value.isdigit(): - - # Numbers - new_value = random.choice(choices_number) - - elif random.choice((True, False)) and value.isalpha(): - - # Letters - new_value = random.choice(choices_letter) - - elif random.choice((True, False)) and value.isalnum(): - - # Alphanumeric - new_value = random.choice(choices_alnum) - - else: - - # Default type - new_value = random.choice(choices_alnum + choices_letter + choices_number) - - # If we worked on a substring, apply changes to the whole string - if value != attrib_value: - # No ' around empty values - if new_value != "" and value != "": - new_value = "'" + new_value + "'" - # Apply changes - new_value = attrib_value.replace(unclean_value, new_value) - - # Log something - if self.verbose: - print( - "Fuzzing attribute #%i '%s' of tag #%i '%s'" - % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) - ) - - # Modify the attribute - rand_elem.set(rand_attrib, new_value.decode("utf-8")) - - def __del_node_and_children(self): - - """High-level minimizing mutator - Delete a random node and its children (i.e. delete a random tree)""" - - self.__del_node(True) - - def __del_node_but_children(self): - - """High-level minimizing mutator - Delete a random node but its children (i.e. link them to the parent of the deleted node)""" - - self.__del_node(False) - - def __del_node(self, delete_children): - - """ Called by the __del_node_* mutators """ - - # Select a node to modify (but the root one) - (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True) - - # If the document includes only a top-level element - # Then we can't pick a element (given that "exclude_root_node = True") - - # Is the document deep enough? - if rand_elem is None: - if self.verbose: - print("Can't delete a node: document not deep enough!") - return - - # Log something - if self.verbose: - but_or_and = "and" if delete_children else "but" - print( - "Deleting tag #%i '%s' %s its children" - % (rand_elem_id, rand_elem.tag, but_or_and) - ) - - if delete_children is False: - # Link children of the random (soon to be deleted) node to its parent - for child in rand_elem: - rand_elem.getparent().append(child) - - # Remove the node - rand_elem.getparent().remove(rand_elem) - - def __del_content(self): - - """High-level minimizing mutator - Delete the attributes and children of a random node""" - - # Select a node to modify - (rand_elem_id, rand_elem) = self.__pick_element() - - # Log something - if self.verbose: - print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag)) - - # Reset the node - rand_elem.clear() - - def __del_attribute(self): - - """High-level minimizing mutator - Delete a random attribute from a random node""" - - # Select a node to modify - (rand_elem_id, rand_elem) = self.__pick_element() - - # Get all the attributes - attribs = rand_elem.keys() - - # Is there attributes? - if len(attribs) < 1: - if self.verbose: - print("No attribute: can't delete!") - return - - # Pick a random attribute - rand_attrib_id = random.randint(0, len(attribs) - 1) - rand_attrib = attribs[rand_attrib_id] - - # Log something - if self.verbose: - print( - "Deleting attribute #%i '%s' of tag #%i '%s'" - % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) - ) - - # Delete the attribute - rand_elem.attrib.pop(rand_attrib) - - def mutate(self, min=1, max=5): - - """ Execute some high-level mutators between $min and $max times, then some medium-level ones """ - - # High-level mutation - self.__exec_among(self, self.hl_mutators_all, min, max) diff --git a/utils/custom_mutators/common.py b/utils/custom_mutators/common.py deleted file mode 100644 index 44a5056a..00000000 --- a/utils/custom_mutators/common.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -Module containing functions shared between multiple AFL modules - -@author: Christian Holler (:decoder) - -@license: - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. - -@contact: choller@mozilla.com -""" - -from __future__ import print_function -import random -import os -import re - - -def randel(l): - if not l: - return None - return l[random.randint(0, len(l) - 1)] - - -def randel_pop(l): - if not l: - return None - return l.pop(random.randint(0, len(l) - 1)) - - -def write_exc_example(data, exc): - exc_name = re.sub(r"[^a-zA-Z0-9]", "_", repr(exc)) - - if not os.path.exists(exc_name): - with open(exc_name, "w") as f: - f.write(data) diff --git a/utils/custom_mutators/custom_mutator_helpers.h b/utils/custom_mutators/custom_mutator_helpers.h deleted file mode 100644 index 62e6efba..00000000 --- a/utils/custom_mutators/custom_mutator_helpers.h +++ /dev/null @@ -1,342 +0,0 @@ -#ifndef CUSTOM_MUTATOR_HELPERS -#define CUSTOM_MUTATOR_HELPERS - -#include "config.h" -#include "types.h" -#include - -#define INITIAL_GROWTH_SIZE (64) - -#define RAND_BELOW(limit) (rand() % (limit)) - -/* Use in a struct: creates a name_buf and a name_size variable. */ -#define BUF_VAR(type, name) \ - type * name##_buf; \ - size_t name##_size; -/* this fills in `&structptr->something_buf, &structptr->something_size`. */ -#define BUF_PARAMS(struct, name) \ - (void **)&struct->name##_buf, &struct->name##_size - -typedef struct { - -} afl_t; - -static void surgical_havoc_mutate(u8 *out_buf, s32 begin, s32 end) { - - static s8 interesting_8[] = {INTERESTING_8}; - static s16 interesting_16[] = {INTERESTING_8, INTERESTING_16}; - static s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32}; - - switch (RAND_BELOW(12)) { - - case 0: { - - /* Flip a single bit somewhere. Spooky! */ - - s32 bit_idx = ((RAND_BELOW(end - begin) + begin) << 3) + RAND_BELOW(8); - - out_buf[bit_idx >> 3] ^= 128 >> (bit_idx & 7); - - break; - - } - - case 1: { - - /* Set byte to interesting value. */ - - u8 val = interesting_8[RAND_BELOW(sizeof(interesting_8))]; - out_buf[(RAND_BELOW(end - begin) + begin)] = val; - - break; - - } - - case 2: { - - /* Set word to interesting value, randomly choosing endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u16 *)(out_buf + byte_idx) = - interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]; - break; - case 1: - *(u16 *)(out_buf + byte_idx) = - SWAP16(interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]); - break; - - } - - break; - - } - - case 3: { - - /* Set dword to interesting value, randomly choosing endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u32 *)(out_buf + byte_idx) = - interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; - break; - case 1: - *(u32 *)(out_buf + byte_idx) = - SWAP32(interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); - break; - - } - - break; - - } - - case 4: { - - /* Set qword to interesting value, randomly choosing endian. */ - - if (end - begin < 8) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 7) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u64 *)(out_buf + byte_idx) = - (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; - break; - case 1: - *(u64 *)(out_buf + byte_idx) = SWAP64( - (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); - break; - - } - - break; - - } - - case 5: { - - /* Randomly subtract from byte. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] -= 1 + RAND_BELOW(ARITH_MAX); - - break; - - } - - case 6: { - - /* Randomly add to byte. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] += 1 + RAND_BELOW(ARITH_MAX); - - break; - - } - - case 7: { - - /* Randomly subtract from word, random endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - if (RAND_BELOW(2)) { - - *(u16 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u16 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u16 *)(out_buf + byte_idx) = - SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) - num); - - } - - break; - - } - - case 8: { - - /* Randomly add to word, random endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - if (RAND_BELOW(2)) { - - *(u16 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u16 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u16 *)(out_buf + byte_idx) = - SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) + num); - - } - - break; - - } - - case 9: { - - /* Randomly subtract from dword, random endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - if (RAND_BELOW(2)) { - - *(u32 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u32 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u32 *)(out_buf + byte_idx) = - SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) - num); - - } - - break; - - } - - case 10: { - - /* Randomly add to dword, random endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - if (RAND_BELOW(2)) { - - *(u32 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u32 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u32 *)(out_buf + byte_idx) = - SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) + num); - - } - - break; - - } - - case 11: { - - /* Just set a random byte to a random value. Because, - why not. We use XOR with 1-255 to eliminate the - possibility of a no-op. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] ^= 1 + RAND_BELOW(255); - - break; - - } - - } - -} - -/* This function calculates the next power of 2 greater or equal its argument. - @return The rounded up power of 2 (if no overflow) or 0 on overflow. -*/ -static inline size_t next_pow2(size_t in) { - - if (in == 0 || in > (size_t)-1) - return 0; /* avoid undefined behaviour under-/overflow */ - size_t out = in - 1; - out |= out >> 1; - out |= out >> 2; - out |= out >> 4; - out |= out >> 8; - out |= out >> 16; - return out + 1; - -} - -/* This function makes sure *size is > size_needed after call. - It will realloc *buf otherwise. - *size will grow exponentially as per: - https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/ - Will return NULL and free *buf if size_needed is <1 or realloc failed. - @return For convenience, this function returns *buf. - */ -static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) { - - /* No need to realloc */ - if (likely(size_needed && *size >= size_needed)) return *buf; - - /* No initial size was set */ - if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE; - - /* grow exponentially */ - size_t next_size = next_pow2(size_needed); - - /* handle overflow */ - if (!next_size) { next_size = size_needed; } - - /* alloc */ - *buf = realloc(*buf, next_size); - *size = *buf ? next_size : 0; - - return *buf; - -} - -/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */ -static inline void afl_swap_bufs(void **buf1, size_t *size1, void **buf2, - size_t *size2) { - - void * scratch_buf = *buf1; - size_t scratch_size = *size1; - *buf1 = *buf2; - *size1 = *size2; - *buf2 = scratch_buf; - *size2 = scratch_size; - -} - -#undef INITIAL_GROWTH_SIZE - -#endif - diff --git a/utils/custom_mutators/example.c b/utils/custom_mutators/example.c deleted file mode 100644 index 23add128..00000000 --- a/utils/custom_mutators/example.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - New Custom Mutator for AFL++ - Written by Khaled Yakdan - Andrea Fioraldi - Shengtuo Hu - Dominik Maier -*/ - -// You need to use -I /path/to/AFLplusplus/include -#include "custom_mutator_helpers.h" - -#include -#include -#include -#include - -#define DATA_SIZE (100) - -static const char *commands[] = { - - "GET", - "PUT", - "DEL", - -}; - -typedef struct my_mutator { - - afl_t *afl; - - // any additional data here! - size_t trim_size_current; - int trimmming_steps; - int cur_step; - - // Reused buffers: - BUF_VAR(u8, fuzz); - BUF_VAR(u8, data); - BUF_VAR(u8, havoc); - BUF_VAR(u8, trim); - BUF_VAR(u8, post_process); - -} my_mutator_t; - -/** - * Initialize this custom mutator - * - * @param[in] afl a pointer to the internal state object. Can be ignored for - * now. - * @param[in] seed A seed for this mutator - the same seed should always mutate - * in the same way. - * @return Pointer to the data object this custom mutator instance should use. - * There may be multiple instances of this mutator in one afl-fuzz run! - * Return NULL on error. - */ -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { - - srand(seed); // needed also by surgical_havoc_mutate() - - my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); - if (!data) { - - perror("afl_custom_init alloc"); - return NULL; - - } - - data->afl = afl; - - return data; - -} - -/** - * Perform custom mutations on a given input - * - * (Optional for now. Required in the future) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[in] buf Pointer to input data to be mutated - * @param[in] buf_size Size of input data - * @param[out] out_buf the buffer we will work on. we can reuse *buf. NULL on - * error. - * @param[in] add_buf Buffer containing the additional test case - * @param[in] add_buf_size Size of the additional test case - * @param[in] max_size Maximum size of the mutated output. The mutation must not - * produce data larger than max_size. - * @return Size of the mutated output. - */ -size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, // add_buf can be NULL - size_t max_size) { - - // Make sure that the packet size does not exceed the maximum size expected by - // the fuzzer - size_t mutated_size = DATA_SIZE <= max_size ? DATA_SIZE : max_size; - - // maybe_grow is optimized to be quick for reused buffers. - u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), mutated_size); - if (!mutated_out) { - - *out_buf = NULL; - perror("custom mutator allocation (maybe_grow)"); - return 0; /* afl-fuzz will very likely error out after this. */ - - } - - // Randomly select a command string to add as a header to the packet - memcpy(mutated_out, commands[rand() % 3], 3); - - // Mutate the payload of the packet - int i; - for (i = 0; i < 8; ++i) { - - // Randomly perform one of the (no len modification) havoc mutations - surgical_havoc_mutate(mutated_out, 3, mutated_size); - - } - - *out_buf = mutated_out; - return mutated_size; - -} - -/** - * A post-processing function to use right before AFL writes the test case to - * disk in order to execute the target. - * - * (Optional) If this functionality is not needed, simply don't define this - * function. - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[in] buf Buffer containing the test case to be executed - * @param[in] buf_size Size of the test case - * @param[out] out_buf Pointer to the buffer containing the test case after - * processing. External library should allocate memory for out_buf. - * The buf pointer may be reused (up to the given buf_size); - * @return Size of the output buffer after processing or the needed amount. - * A return of 0 indicates an error. - */ -size_t afl_custom_post_process(my_mutator_t *data, uint8_t *buf, - size_t buf_size, uint8_t **out_buf) { - - uint8_t *post_process_buf = - maybe_grow(BUF_PARAMS(data, post_process), buf_size + 5); - if (!post_process_buf) { - - perror("custom mutator realloc failed."); - *out_buf = NULL; - return 0; - - } - - memcpy(post_process_buf + 5, buf, buf_size); - post_process_buf[0] = 'A'; - post_process_buf[1] = 'F'; - post_process_buf[2] = 'L'; - post_process_buf[3] = '+'; - post_process_buf[4] = '+'; - - *out_buf = post_process_buf; - - return buf_size + 5; - -} - -/** - * This method is called at the start of each trimming operation and receives - * the initial buffer. It should return the amount of iteration steps possible - * on this input (e.g. if your input has n elements and you want to remove - * them one by one, return n, if you do a binary search, return log(n), - * and so on...). - * - * If your trimming algorithm doesn't allow you to determine the amount of - * (remaining) steps easily (esp. while running), then you can alternatively - * return 1 here and always return 0 in post_trim until you are finished and - * no steps remain. In that case, returning 1 in post_trim will end the - * trimming routine. The whole current index/max iterations stuff is only used - * to show progress. - * - * (Optional) - * - * @param data pointer returned in afl_custom_init for this fuzz case - * @param buf Buffer containing the test case - * @param buf_size Size of the test case - * @return The amount of possible iteration steps to trim the input. - * negative on error. - */ -int32_t afl_custom_init_trim(my_mutator_t *data, uint8_t *buf, - size_t buf_size) { - - // We simply trim once - data->trimmming_steps = 1; - - data->cur_step = 0; - - if (!maybe_grow(BUF_PARAMS(data, trim), buf_size)) { - - perror("init_trim grow"); - return -1; - - } - - memcpy(data->trim_buf, buf, buf_size); - - data->trim_size_current = buf_size; - - return data->trimmming_steps; - -} - -/** - * This method is called for each trimming operation. It doesn't have any - * arguments because we already have the initial buffer from init_trim and we - * can memorize the current state in *data. This can also save - * reparsing steps for each iteration. It should return the trimmed input - * buffer, where the returned data must not exceed the initial input data in - * length. Returning anything that is larger than the original data (passed - * to init_trim) will result in a fatal abort of AFLFuzz. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[out] out_buf Pointer to the buffer containing the trimmed test case. - * External library should allocate memory for out_buf. - * AFL++ will not release the memory after saving the test case. - * Keep a ref in *data. - * *out_buf = NULL is treated as error. - * @return Pointer to the size of the trimmed test case - */ -size_t afl_custom_trim(my_mutator_t *data, uint8_t **out_buf) { - - *out_buf = data->trim_buf; - - // Remove the last byte of the trimming input - return data->trim_size_current - 1; - -} - -/** - * This method is called after each trim operation to inform you if your - * trimming step was successful or not (in terms of coverage). If you receive - * a failure here, you should reset your input to the last known good state. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param success Indicates if the last trim operation was successful. - * @return The next trim iteration index (from 0 to the maximum amount of - * steps returned in init_trim). negative ret on failure. - */ -int32_t afl_custom_post_trim(my_mutator_t *data, int success) { - - if (success) { - - ++data->cur_step; - return data->cur_step; - - } - - return data->trimmming_steps; - -} - -/** - * Perform a single custom mutation on a given input. - * This mutation is stacked with the other muatations in havoc. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param[in] buf Pointer to the input data to be mutated and the mutated - * output - * @param[in] buf_size Size of input data - * @param[out] out_buf The output buffer. buf can be reused, if the content - * fits. *out_buf = NULL is treated as error. - * @param[in] max_size Maximum size of the mutated output. The mutation must - * not produce data larger than max_size. - * @return Size of the mutated output. - */ -size_t afl_custom_havoc_mutation(my_mutator_t *data, u8 *buf, size_t buf_size, - u8 **out_buf, size_t max_size) { - - if (buf_size == 0) { - - *out_buf = maybe_grow(BUF_PARAMS(data, havoc), 1); - if (!*out_buf) { - - perror("custom havoc: maybe_grow"); - return 0; - - } - - **out_buf = rand() % 256; - buf_size = 1; - - } else { - - // We reuse buf here. It's legal and faster. - *out_buf = buf; - - } - - size_t victim = rand() % buf_size; - (*out_buf)[victim] += rand() % 10; - - return buf_size; - -} - -/** - * Return the probability (in percentage) that afl_custom_havoc_mutation - * is called in havoc. By default it is 6 %. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @return The probability (0-100). - */ -uint8_t afl_custom_havoc_mutation_probability(my_mutator_t *data) { - - return 5; // 5 % - -} - -/** - * Determine whether the fuzzer should fuzz the queue entry or not. - * - * (Optional) - * - * @param[in] data pointer returned in afl_custom_init for this fuzz case - * @param filename File name of the test case in the queue entry - * @return Return True(1) if the fuzzer will fuzz the queue entry, and - * False(0) otherwise. - */ -uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) { - - return 1; - -} - -/** - * Allow for additional analysis (e.g. calling a different tool that does a - * different kind of coverage and saves this for the custom mutator). - * - * (Optional) - * - * @param data pointer returned in afl_custom_init for this fuzz case - * @param filename_new_queue File name of the new queue entry - * @param filename_orig_queue File name of the original queue entry - */ -void afl_custom_queue_new_entry(my_mutator_t * data, - const uint8_t *filename_new_queue, - const uint8_t *filename_orig_queue) { - - /* Additional analysis on the original or new test case */ - -} - -/** - * Deinitialize everything - * - * @param data The data ptr from afl_custom_init - */ -void afl_custom_deinit(my_mutator_t *data) { - - free(data->post_process_buf); - free(data->havoc_buf); - free(data->data_buf); - free(data->fuzz_buf); - free(data->trim_buf); - free(data); - -} - diff --git a/utils/custom_mutators/example.py b/utils/custom_mutators/example.py deleted file mode 100644 index 3a6d22e4..00000000 --- a/utils/custom_mutators/example.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -Example Python Module for AFLFuzz - -@author: Christian Holler (:decoder) - -@license: - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. - -@contact: choller@mozilla.com -""" - -import random - - -COMMANDS = [ - b"GET", - b"PUT", - b"DEL", - b"AAAAAAAAAAAAAAAAA", -] - - -def init(seed): - """ - Called once when AFLFuzz starts up. Used to seed our RNG. - - @type seed: int - @param seed: A 32-bit random value - """ - random.seed(seed) - - -def deinit(): - pass - - -def fuzz(buf, add_buf, max_size): - """ - Called per fuzzing iteration. - - @type buf: bytearray - @param buf: The buffer that should be mutated. - - @type add_buf: bytearray - @param add_buf: A second buffer that can be used as mutation source. - - @type max_size: int - @param max_size: Maximum size of the mutated output. The mutation must not - produce data larger than max_size. - - @rtype: bytearray - @return: A new bytearray containing the mutated data - """ - ret = bytearray(100) - - ret[:3] = random.choice(COMMANDS) - - return ret - - -# Uncomment and implement the following methods if you want to use a custom -# trimming algorithm. See also the documentation for a better API description. - -# def init_trim(buf): -# ''' -# Called per trimming iteration. -# -# @type buf: bytearray -# @param buf: The buffer that should be trimmed. -# -# @rtype: int -# @return: The maximum number of trimming steps. -# ''' -# global ... -# -# # Initialize global variables -# -# # Figure out how many trimming steps are possible. -# # If this is not possible for your trimming, you can -# # return 1 instead and always return 0 in post_trim -# # until you are done (then you return 1). -# -# return steps -# -# def trim(): -# ''' -# Called per trimming iteration. -# -# @rtype: bytearray -# @return: A new bytearray containing the trimmed data. -# ''' -# global ... -# -# # Implement the actual trimming here -# -# return bytearray(...) -# -# def post_trim(success): -# ''' -# Called after each trimming operation. -# -# @type success: bool -# @param success: Indicates if the last trim operation was successful. -# -# @rtype: int -# @return: The next trim index (0 to max number of steps) where max -# number of steps indicates the trimming is done. -# ''' -# global ... -# -# if not success: -# # Restore last known successful input, determine next index -# else: -# # Just determine the next index, based on what was successfully -# # removed in the last step -# -# return next_index -# -# def post_process(buf): -# ''' -# Called just before the execution to write the test case in the format -# expected by the target -# -# @type buf: bytearray -# @param buf: The buffer containing the test case to be executed -# -# @rtype: bytearray -# @return: The buffer containing the test case after -# ''' -# return buf -# -# def havoc_mutation(buf, max_size): -# ''' -# Perform a single custom mutation on a given input. -# -# @type buf: bytearray -# @param buf: The buffer that should be mutated. -# -# @type max_size: int -# @param max_size: Maximum size of the mutated output. The mutation must not -# produce data larger than max_size. -# -# @rtype: bytearray -# @return: A new bytearray containing the mutated data -# ''' -# return mutated_buf -# -# def havoc_mutation_probability(): -# ''' -# Called for each `havoc_mutation`. Return the probability (in percentage) -# that `havoc_mutation` is called in havoc. Be default it is 6%. -# -# @rtype: int -# @return: The probability (0-100) -# ''' -# return prob -# -# def queue_get(filename): -# ''' -# Called at the beginning of each fuzz iteration to determine whether the -# test case should be fuzzed -# -# @type filename: str -# @param filename: File name of the test case in the current queue entry -# -# @rtype: bool -# @return: Return True if the custom mutator decides to fuzz the test case, -# and False otherwise -# ''' -# return True -# -# def queue_new_entry(filename_new_queue, filename_orig_queue): -# ''' -# Called after adding a new test case to the queue -# -# @type filename_new_queue: str -# @param filename_new_queue: File name of the new queue entry -# -# @type filename_orig_queue: str -# @param filename_orig_queue: File name of the original queue entry -# ''' -# pass diff --git a/utils/custom_mutators/post_library_gif.so.c b/utils/custom_mutators/post_library_gif.so.c deleted file mode 100644 index ac10f409..00000000 --- a/utils/custom_mutators/post_library_gif.so.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - american fuzzy lop++ - postprocessor library example - -------------------------------------------------- - - Originally written by Michal Zalewski - Edited by Dominik Maier, 2020 - - Copyright 2015 Google Inc. All rights reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - Postprocessor libraries can be passed to afl-fuzz to perform final cleanup - of any mutated test cases - for example, to fix up checksums in PNG files. - - Please heed the following warnings: - - 1) In almost all cases, it is more productive to comment out checksum logic - in the targeted binary (as shown in ../libpng_no_checksum/). One possible - exception is the process of fuzzing binary-only software in QEMU mode. - - 2) The use of postprocessors for anything other than checksums is - questionable and may cause more harm than good. AFL is normally pretty good - about dealing with length fields, magic values, etc. - - 3) Postprocessors that do anything non-trivial must be extremely robust to - gracefully handle malformed data and other error conditions - otherwise, - they will crash and take afl-fuzz down with them. Be wary of reading past - *len and of integer overflows when calculating file offsets. - - In other words, THIS IS PROBABLY NOT WHAT YOU WANT - unless you really, - honestly know what you're doing =) - - With that out of the way: the postprocessor library is passed to afl-fuzz - via AFL_POST_LIBRARY. The library must be compiled with: - - gcc -shared -Wall -O3 post_library.so.c -o post_library.so - - AFL will call the afl_custom_post_process() function for every mutated output - buffer. From there, you have three choices: - - 1) If you don't want to modify the test case, simply set `*out_buf = in_buf` - and return the original `len`. - - 2) If you want to skip this test case altogether and have AFL generate a - new one, return 0 or set `*out_buf = NULL`. - Use this sparingly - it's faster than running the target program - with patently useless inputs, but still wastes CPU time. - - 3) If you want to modify the test case, allocate an appropriately-sized - buffer, move the data into that buffer, make the necessary changes, and - then return the new pointer as out_buf. Return an appropriate len - afterwards. - - Note that the buffer will *not* be freed for you. To avoid memory leaks, - you need to free it or reuse it on subsequent calls (as shown below). - - *** Feel free to reuse the original 'in_buf' BUFFER and return it. *** - - Aight. The example below shows a simple postprocessor that tries to make - sure that all input files start with "GIF89a". - - PS. If you don't like C, you can try out the unix-based wrapper from - Ben Nagy instead: https://github.com/bnagy/aflfix - - */ - -#include -#include -#include - -/* Header that must be present at the beginning of every test case: */ - -#define HEADER "GIF89a" - -typedef struct post_state { - - unsigned char *buf; - size_t size; - -} post_state_t; - -void *afl_custom_init(void *afl) { - - post_state_t *state = malloc(sizeof(post_state_t)); - if (!state) { - - perror("malloc"); - return NULL; - - } - - state->buf = calloc(sizeof(unsigned char), 4096); - if (!state->buf) { - - free(state); - perror("calloc"); - return NULL; - - } - - return state; - -} - -/* The actual postprocessor routine called by afl-fuzz: */ - -size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, - unsigned int len, unsigned char **out_buf) { - - /* Skip execution altogether for buffers shorter than 6 bytes (just to - show how it's done). We can trust len to be sane. */ - - if (len < strlen(HEADER)) return 0; - - /* Do nothing for buffers that already start with the expected header. */ - - if (!memcmp(in_buf, HEADER, strlen(HEADER))) { - - *out_buf = in_buf; - return len; - - } - - /* Allocate memory for new buffer, reusing previous allocation if - possible. */ - - *out_buf = realloc(data->buf, len); - - /* If we're out of memory, the most graceful thing to do is to return the - original buffer and give up on modifying it. Let AFL handle OOM on its - own later on. */ - - if (!*out_buf) { - - *out_buf = in_buf; - return len; - - } - - /* Copy the original data to the new location. */ - - memcpy(*out_buf, in_buf, len); - - /* Insert the new header. */ - - memcpy(*out_buf, HEADER, strlen(HEADER)); - - /* Return the new len. It hasn't changed, so it's just len. */ - - return len; - -} - -/* Gets called afterwards */ -void afl_custom_deinit(post_state_t *data) { - - free(data->buf); - free(data); - -} - diff --git a/utils/custom_mutators/post_library_png.so.c b/utils/custom_mutators/post_library_png.so.c deleted file mode 100644 index 941f7e55..00000000 --- a/utils/custom_mutators/post_library_png.so.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - american fuzzy lop++ - postprocessor for PNG - ------------------------------------------ - - Originally written by Michal Zalewski - - Copyright 2015 Google Inc. All rights reserved. - Adapted to the new API, 2020 by Dominik Maier - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - See post_library.so.c for a general discussion of how to implement - postprocessors. This specific postprocessor attempts to fix up PNG - checksums, providing a slightly more complicated example than found - in post_library.so.c. - - Compile with: - - gcc -shared -Wall -O3 post_library_png.so.c -o post_library_png.so -lz - - */ - -#include -#include -#include -#include -#include - -#include - -/* A macro to round an integer up to 4 kB. */ - -#define UP4K(_i) ((((_i) >> 12) + 1) << 12) - -typedef struct post_state { - - unsigned char *buf; - size_t size; - -} post_state_t; - -void *afl_custom_init(void *afl) { - - post_state_t *state = malloc(sizeof(post_state_t)); - if (!state) { - - perror("malloc"); - return NULL; - - } - - state->buf = calloc(sizeof(unsigned char), 4096); - if (!state->buf) { - - free(state); - perror("calloc"); - return NULL; - - } - - return state; - -} - -size_t afl_custom_post_process(post_state_t *data, const unsigned char *in_buf, - unsigned int len, - const unsigned char **out_buf) { - - unsigned char *new_buf = (unsigned char *)in_buf; - unsigned int pos = 8; - - /* Don't do anything if there's not enough room for the PNG header - (8 bytes). */ - - if (len < 8) { - - *out_buf = in_buf; - return len; - - } - - /* Minimum size of a zero-length PNG chunk is 12 bytes; if we - don't have that, we can bail out. */ - - while (pos + 12 <= len) { - - unsigned int chunk_len, real_cksum, file_cksum; - - /* Chunk length is the first big-endian dword in the chunk. */ - - chunk_len = ntohl(*(uint32_t *)(in_buf + pos)); - - /* Bail out if chunk size is too big or goes past EOF. */ - - if (chunk_len > 1024 * 1024 || pos + 12 + chunk_len > len) break; - - /* Chunk checksum is calculated for chunk ID (dword) and the actual - payload. */ - - real_cksum = htonl(crc32(0, in_buf + pos + 4, chunk_len + 4)); - - /* The in-file checksum is the last dword past the chunk data. */ - - file_cksum = *(uint32_t *)(in_buf + pos + 8 + chunk_len); - - /* If the checksums do not match, we need to fix the file. */ - - if (real_cksum != file_cksum) { - - /* First modification? Make a copy of the input buffer. Round size - up to 4 kB to minimize the number of reallocs needed. */ - - if (new_buf == in_buf) { - - if (len <= data->size) { - - new_buf = data->buf; - - } else { - - new_buf = realloc(data->buf, UP4K(len)); - if (!new_buf) { - - *out_buf = in_buf; - return len; - - } - - data->buf = new_buf; - data->size = UP4K(len); - memcpy(new_buf, in_buf, len); - - } - - } - - *(uint32_t *)(new_buf + pos + 8 + chunk_len) = real_cksum; - - } - - /* Skip the entire chunk and move to the next one. */ - - pos += 12 + chunk_len; - - } - - *out_buf = new_buf; - return len; - -} - -/* Gets called afterwards */ -void afl_custom_deinit(post_state_t *data) { - - free(data->buf); - free(data); - -} - diff --git a/utils/custom_mutators/simple-chunk-replace.py b/utils/custom_mutators/simple-chunk-replace.py deleted file mode 100644 index c57218dd..00000000 --- a/utils/custom_mutators/simple-chunk-replace.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -Simple Chunk Cross-Over Replacement Module for AFLFuzz - -@author: Christian Holler (:decoder) - -@license: - -This Source Code Form is subject to the terms of the Mozilla Public -License, v. 2.0. If a copy of the MPL was not distributed with this -file, You can obtain one at http://mozilla.org/MPL/2.0/. - -@contact: choller@mozilla.com -""" - -import random - - -def init(seed): - """ - Called once when AFLFuzz starts up. Used to seed our RNG. - - @type seed: int - @param seed: A 32-bit random value - """ - # Seed our RNG - random.seed(seed) - - -def fuzz(buf, add_buf, max_size): - """ - Called per fuzzing iteration. - - @type buf: bytearray - @param buf: The buffer that should be mutated. - - @type add_buf: bytearray - @param add_buf: A second buffer that can be used as mutation source. - - @type max_size: int - @param max_size: Maximum size of the mutated output. The mutation must not - produce data larger than max_size. - - @rtype: bytearray - @return: A new bytearray containing the mutated data - """ - # Make a copy of our input buffer for returning - ret = bytearray(buf) - - # Take a random fragment length between 2 and 32 (or less if add_buf is shorter) - fragment_len = random.randint(1, min(len(add_buf), 32)) - - # Determine a random source index where to take the data chunk from - rand_src_idx = random.randint(0, len(add_buf) - fragment_len) - - # Determine a random destination index where to put the data chunk - rand_dst_idx = random.randint(0, len(buf)) - - # Make the chunk replacement - ret[rand_dst_idx : rand_dst_idx + fragment_len] = add_buf[ - rand_src_idx : rand_src_idx + fragment_len - ] - - # Return data - return ret diff --git a/utils/custom_mutators/simple_example.c b/utils/custom_mutators/simple_example.c deleted file mode 100644 index d888ec1f..00000000 --- a/utils/custom_mutators/simple_example.c +++ /dev/null @@ -1,74 +0,0 @@ -// This simple example just creates random buffer <= 100 filled with 'A' -// needs -I /path/to/AFLplusplus/include -#include "custom_mutator_helpers.h" - -#include -#include -#include -#include - -#ifndef _FIXED_CHAR - #define _FIXED_CHAR 0x41 -#endif - -typedef struct my_mutator { - - afl_t *afl; - - // Reused buffers: - BUF_VAR(u8, fuzz); - -} my_mutator_t; - -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { - - srand(seed); - my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); - if (!data) { - - perror("afl_custom_init alloc"); - return NULL; - - } - - data->afl = afl; - - return data; - -} - -size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, // add_buf can be NULL - size_t max_size) { - - int size = (rand() % 100) + 1; - if (size > max_size) size = max_size; - u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), size); - if (!mutated_out) { - - *out_buf = NULL; - perror("custom mutator allocation (maybe_grow)"); - return 0; /* afl-fuzz will very likely error out after this. */ - - } - - memset(mutated_out, _FIXED_CHAR, size); - - *out_buf = mutated_out; - return size; - -} - -/** - * Deinitialize everything - * - * @param data The data ptr from afl_custom_init - */ -void afl_custom_deinit(my_mutator_t *data) { - - free(data->fuzz_buf); - free(data); - -} - diff --git a/utils/custom_mutators/wrapper_afl_min.py b/utils/custom_mutators/wrapper_afl_min.py deleted file mode 100644 index 5cd60031..00000000 --- a/utils/custom_mutators/wrapper_afl_min.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python - -from XmlMutatorMin import XmlMutatorMin - -# Default settings (production mode) - -__mutator__ = None -__seed__ = "RANDOM" -__log__ = False -__log_file__ = "wrapper.log" - - -# AFL functions -def log(text): - """ - Logger - """ - - global __seed__ - global __log__ - global __log_file__ - - if __log__: - with open(__log_file__, "a") as logf: - logf.write("[%s] %s\n" % (__seed__, text)) - - -def init(seed): - """ - Called once when AFL starts up. Seed is used to identify the AFL instance in log files - """ - - global __mutator__ - global __seed__ - - # Get the seed - __seed__ = seed - - # Create a global mutation class - try: - __mutator__ = XmlMutatorMin(__seed__, verbose=__log__) - log("init(): Mutator created") - except RuntimeError as e: - log("init(): Can't create mutator: %s" % e.message) - - -def fuzz(buf, add_buf, max_size): - """ - Called for each fuzzing iteration. - """ - - global __mutator__ - - # Do we have a working mutator object? - if __mutator__ is None: - log("fuzz(): Can't fuzz, no mutator available") - return buf - - # Try to use the AFL buffer - via_buffer = True - - # Interpret the AFL buffer (an array of bytes) as a string - if via_buffer: - try: - buf_str = str(buf) - log("fuzz(): AFL buffer converted to a string") - except Exception: - via_buffer = False - log("fuzz(): Can't convert AFL buffer to a string") - - # Load XML from the AFL string - if via_buffer: - try: - __mutator__.init_from_string(buf_str) - log( - "fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" - % len(buf_str) - ) - except Exception: - via_buffer = False - log("fuzz(): Can't initialize mutator with AFL buffer") - - # If init from AFL buffer wasn't succesful - if not via_buffer: - log("fuzz(): Returning unmodified AFL buffer") - return buf - - # Sucessful initialization -> mutate - try: - __mutator__.mutate(max=5) - log("fuzz(): Input mutated") - except Exception: - log("fuzz(): Can't mutate input => returning buf") - return buf - - # Convert mutated data to a array of bytes - try: - data = bytearray(__mutator__.save_to_string()) - log("fuzz(): Mutated data converted as bytes") - except Exception: - log("fuzz(): Can't convert mutated data to bytes => returning buf") - return buf - - # Everything went fine, returning mutated content - log("fuzz(): Returning %d bytes" % len(data)) - return data - - -# Main (for debug) -if __name__ == "__main__": - - __log__ = True - __log_file__ = "/dev/stdout" - __seed__ = "RANDOM" - - init(__seed__) - - in_1 = bytearray( - "ffffzzzzzzzzzzzz" - ) - in_2 = bytearray("") - out = fuzz(in_1, in_2) - print(out) -- cgit 1.4.1 From bd38fb672211b571667eeb6d760114dd2ab0c9ec Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 26 Apr 2021 20:19:00 +0200 Subject: fix qdbi --- utils/qbdi_mode/template.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/qbdi_mode/template.cpp b/utils/qbdi_mode/template.cpp index b2066cc8..888ecb58 100755 --- a/utils/qbdi_mode/template.cpp +++ b/utils/qbdi_mode/template.cpp @@ -25,7 +25,7 @@ #if (defined(__x86_64__) || defined(__i386__)) && defined(AFL_QEMU_NOT_ZERO) #define INC_AFL_AREA(loc) \ asm volatile( \ - "incb (%0, %1, 1)\n" \ + "addb $1, (%0, %1, 1)\n" \ "adcb $0, (%0, %1, 1)\n" \ : /* no out */ \ : "r"(afl_area_ptr), "r"(loc) \ -- cgit 1.4.1 From 8da5cba4012080afca5e7f7da9aaa6aa6e263f3e Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 26 Apr 2021 20:20:47 +0200 Subject: update util readme --- utils/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/README.md b/utils/README.md index 336b6b6c..b157424f 100644 --- a/utils/README.md +++ b/utils/README.md @@ -32,7 +32,8 @@ Here's a quick overview of the stuff you can find in this directory: with additional gdb metadata. - custom_mutators - examples for the afl++ custom mutator interface in - C and Python + C and Python. Note: They were moved to + ../custom_mutators/examples/ - distributed_fuzzing - a sample script for synchronizing fuzzer instances across multiple machines (see parallel_fuzzing.md). -- cgit 1.4.1 From dde0538b484df627dac14ff030dd09f55c78558e Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 28 Apr 2021 10:59:34 +0200 Subject: nits --- docs/Changelog.md | 1 + qemu_mode/qemuafl | 2 +- utils/aflpp_driver/aflpp_qemu_driver_hook.c | 10 +++++----- utils/qbdi_mode/template.cpp | 2 +- utils/qemu_persistent_hook/read_into_rdi.c | 10 +++++----- 5 files changed, 13 insertions(+), 12 deletions(-) (limited to 'utils') diff --git a/docs/Changelog.md b/docs/Changelog.md index 520b13b1..90a1d140 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -10,6 +10,7 @@ sending a mail to . ### Version ++3.13a (development) - frida_mode - new mode that uses frida to fuzz binary-only targets, + it currently supports persistent mode and cmplog. thanks to @WorksButNotTested! - create a fuzzing dictionary with the help of CodeQL thanks to @microsvuln! see utils/autodict_ql diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl index d1ca56b8..d73b0336 160000 --- a/qemu_mode/qemuafl +++ b/qemu_mode/qemuafl @@ -1 +1 @@ -Subproject commit d1ca56b84e78f821406eef28d836918edfc8d610 +Subproject commit d73b0336b451fd034e5f469089fb7ee96c80adf2 diff --git a/utils/aflpp_driver/aflpp_qemu_driver_hook.c b/utils/aflpp_driver/aflpp_qemu_driver_hook.c index d3dd98b0..2979fadc 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver_hook.c +++ b/utils/aflpp_driver/aflpp_qemu_driver_hook.c @@ -3,12 +3,12 @@ #include #include -void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_buf_len) { - #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) #define h2g(x) ((uint64_t)(x)-guest_base) +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + // In this example the register RDI is pointing to the memory location // of the target buffer, and the length of the input is in RSI. // This can be seen with a debugger, e.g. gdb (and "disass main") @@ -16,11 +16,11 @@ void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, memcpy(g2h(regs->rdi), input_buf, input_buf_len); regs->rsi = input_buf_len; +} + #undef g2h #undef h2g -} - int afl_persistent_hook_init(void) { // 1 for shared memory input (faster), 0 for normal input (you have to use diff --git a/utils/qbdi_mode/template.cpp b/utils/qbdi_mode/template.cpp index 888ecb58..182a014b 100755 --- a/utils/qbdi_mode/template.cpp +++ b/utils/qbdi_mode/template.cpp @@ -25,7 +25,7 @@ #if (defined(__x86_64__) || defined(__i386__)) && defined(AFL_QEMU_NOT_ZERO) #define INC_AFL_AREA(loc) \ asm volatile( \ - "addb $1, (%0, %1, 1)\n" \ + "addb $1, (%0, %1, 1)\n" \ "adcb $0, (%0, %1, 1)\n" \ : /* no out */ \ : "r"(afl_area_ptr), "r"(loc) \ diff --git a/utils/qemu_persistent_hook/read_into_rdi.c b/utils/qemu_persistent_hook/read_into_rdi.c index c1c6642f..14b2ed85 100644 --- a/utils/qemu_persistent_hook/read_into_rdi.c +++ b/utils/qemu_persistent_hook/read_into_rdi.c @@ -3,12 +3,12 @@ #include #include -void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_buf_len) { - #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) #define h2g(x) ((uint64_t)(x)-guest_base) +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + // In this example the register RDI is pointing to the memory location // of the target buffer, and the length of the input is in RSI. // This can be seen with a debugger, e.g. gdb (and "disass main") @@ -19,11 +19,11 @@ void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, memcpy(g2h(regs->rdi), input_buf, input_buf_len); regs->rsi = input_buf_len; +} + #undef g2h #undef h2g -} - int afl_persistent_hook_init(void) { // 1 for shared memory input (faster), 0 for normal input (you have to use -- cgit 1.4.1 From f112357e6165b583924b9b4e44b5b6ef522f722f Mon Sep 17 00:00:00 2001 From: Dmitry Zheregelya Date: Wed, 28 Apr 2021 18:42:20 +0300 Subject: Integer overflow/underflow fixes in libdislocator (#889) * libdislocator: fixing integer overflow in 'max_mem' variable and setting 'max_mem' type to 'size_t' * libdislocator: fixing potential integer underflow in 'total_mem' variable due to its different values in different threads --- utils/libdislocator/libdislocator.so.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'utils') diff --git a/utils/libdislocator/libdislocator.so.c b/utils/libdislocator/libdislocator.so.c index 1b247c86..7c651afd 100644 --- a/utils/libdislocator/libdislocator.so.c +++ b/utils/libdislocator/libdislocator.so.c @@ -144,7 +144,7 @@ typedef struct { /* Configurable stuff (use AFL_LD_* to set): */ -static u32 max_mem = MAX_ALLOC; /* Max heap usage to permit */ +static size_t max_mem = MAX_ALLOC; /* Max heap usage to permit */ static u8 alloc_verbose, /* Additional debug messages */ hard_fail, /* abort() when max_mem exceeded? */ no_calloc_over, /* abort() on calloc() overflows? */ @@ -154,7 +154,7 @@ static u8 alloc_verbose, /* Additional debug messages */ #define __thread #warning no thread support available #endif -static __thread size_t total_mem; /* Currently allocated mem */ +static _Atomic size_t total_mem; /* Currently allocated mem */ static __thread u32 call_depth; /* To avoid recursion via fprintf() */ static u32 alloc_canary; @@ -172,9 +172,9 @@ static void *__dislocator_alloc(size_t len) { if (total_mem + len > max_mem || total_mem + len < total_mem) { - if (hard_fail) FATAL("total allocs exceed %u MB", max_mem / 1024 / 1024); + if (hard_fail) FATAL("total allocs exceed %zu MB", max_mem / 1024 / 1024); - DEBUGF("total allocs exceed %u MB, returning NULL", max_mem / 1024 / 1024); + DEBUGF("total allocs exceed %zu MB, returning NULL", max_mem / 1024 / 1024); return NULL; @@ -500,19 +500,20 @@ size_t malloc_usable_size(const void *ptr) { __attribute__((constructor)) void __dislocator_init(void) { - u8 *tmp = (u8 *)getenv("AFL_LD_LIMIT_MB"); + char *tmp = getenv("AFL_LD_LIMIT_MB"); if (tmp) { - u8 *tok; - s32 mmem = (s32)strtol((char *)tmp, (char **)&tok, 10); - if (*tok != '\0' || errno == ERANGE) FATAL("Bad value for AFL_LD_LIMIT_MB"); + char *tok; + unsigned long long mmem = strtoull(tmp, &tok, 10); + if (*tok != '\0' || errno == ERANGE || mmem > SIZE_MAX / 1024 / 1024) + FATAL("Bad value for AFL_LD_LIMIT_MB"); max_mem = mmem * 1024 * 1024; } alloc_canary = ALLOC_CANARY; - tmp = (u8 *)getenv("AFL_RANDOM_ALLOC_CANARY"); + tmp = getenv("AFL_RANDOM_ALLOC_CANARY"); if (tmp) arc4random_buf(&alloc_canary, sizeof(alloc_canary)); @@ -549,4 +550,3 @@ void *erealloc(void *ptr, size_t len) { return realloc(ptr, len); } - -- cgit 1.4.1 From 29dbe665a7a7dc6b2232487dbc6c1ebecbbdfb06 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 29 Apr 2021 09:12:21 +0200 Subject: nits --- src/afl-fuzz.c | 2 ++ utils/libdislocator/libdislocator.so.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 58b0a5c2..1b3e303c 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -1358,6 +1358,7 @@ int main(int argc, char **argv_orig, char **envp) { afl_preload = getenv("AFL_PRELOAD"); u8 *frida_binary = find_afl_binary(argv[0], "afl-frida-trace.so"); + OKF("Injecting %s ...", frida_binary); if (afl_preload) { frida_afl_preload = alloc_printf("%s:%s", afl_preload, frida_binary); @@ -1383,6 +1384,7 @@ int main(int argc, char **argv_orig, char **envp) { } else if (afl->fsrv.frida_mode) { u8 *frida_binary = find_afl_binary(argv[0], "afl-frida-trace.so"); + OKF("Injecting %s ...", frida_binary); setenv("LD_PRELOAD", frida_binary, 1); setenv("DYLD_INSERT_LIBRARIES", frida_binary, 1); ck_free(frida_binary); diff --git a/utils/libdislocator/libdislocator.so.c b/utils/libdislocator/libdislocator.so.c index 7c651afd..dde78f7b 100644 --- a/utils/libdislocator/libdislocator.so.c +++ b/utils/libdislocator/libdislocator.so.c @@ -145,7 +145,7 @@ typedef struct { /* Configurable stuff (use AFL_LD_* to set): */ static size_t max_mem = MAX_ALLOC; /* Max heap usage to permit */ -static u8 alloc_verbose, /* Additional debug messages */ +static u8 alloc_verbose, /* Additional debug messages */ hard_fail, /* abort() when max_mem exceeded? */ no_calloc_over, /* abort() on calloc() overflows? */ align_allocations; /* Force alignment to sizeof(void*) */ @@ -504,7 +504,7 @@ __attribute__((constructor)) void __dislocator_init(void) { if (tmp) { - char *tok; + char * tok; unsigned long long mmem = strtoull(tmp, &tok, 10); if (*tok != '\0' || errno == ERANGE || mmem > SIZE_MAX / 1024 / 1024) FATAL("Bad value for AFL_LD_LIMIT_MB"); @@ -550,3 +550,4 @@ void *erealloc(void *ptr, size_t len) { return realloc(ptr, len); } + -- cgit 1.4.1 From c9d066038fe0bbf8e0ab0a481ca320ca1c31b1bf Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 30 Apr 2021 10:27:43 +0200 Subject: fix PCGUARD, build aflpp_driver with fPIC --- docs/Changelog.md | 5 +- instrumentation/SanitizerCoverageLTO.so.cc | 15 ++-- instrumentation/SanitizerCoveragePCGUARD.so.cc | 102 +++++++++++-------------- utils/afl_proxy/afl-proxy.c | 6 ++ utils/aflpp_driver/GNUmakefile | 4 +- 5 files changed, 64 insertions(+), 68 deletions(-) (limited to 'utils') diff --git a/docs/Changelog.md b/docs/Changelog.md index 90a1d140..5c0f2a9e 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -32,10 +32,13 @@ sending a mail to . afl++ ignores these and uses them for splicing instead. - afl-cc: - We do not support llvm versions prior 6.0 anymore + - Fix for -pie compiled binaries with default afl-clang-fast PCGUARD - Leak Sanitizer (AFL_USE_LSAN) added by Joshua Rogers, thanks! - Removed InsTrim instrumentation as it is not as good as PCGUARD - Removed automatic linking with -lc++ for LTO mode - - utils/aflpp_driver/aflpp_qemu_driver_hook fixed to work with qemu mode + - utils/aflpp_driver: + - aflpp_qemu_driver_hook fixed to work with qemu_mode + - aflpp_driver now compiled with -fPIC - add -d (add dead fuzzer stats) to afl-whatsup ### Version ++3.12c (release) diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc index 6dd390e6..2f4337eb 100644 --- a/instrumentation/SanitizerCoverageLTO.so.cc +++ b/instrumentation/SanitizerCoverageLTO.so.cc @@ -60,15 +60,14 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const SanCovTracePCIndirName = - "__sanitizer_cov_trace_pc_indir"; -static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; -// static const char *const SanCovTracePCGuardName = +const char SanCovTracePCIndirName[] = "__sanitizer_cov_trace_pc_indir"; +const char SanCovTracePCName[] = "__sanitizer_cov_trace_pc"; +// const char SanCovTracePCGuardName = // "__sanitizer_cov_trace_pc_guard"; -static const char *const SanCovGuardsSectionName = "sancov_guards"; -static const char *const SanCovCountersSectionName = "sancov_cntrs"; -static const char *const SanCovBoolFlagSectionName = "sancov_bools"; -static const char *const SanCovPCsSectionName = "sancov_pcs"; +const char SanCovGuardsSectionName[] = "sancov_guards"; +const char SanCovCountersSectionName[] = "sancov_cntrs"; +const char SanCovBoolFlagSectionName[] = "sancov_bools"; +const char SanCovPCsSectionName[] = "sancov_pcs"; static cl::opt ClCoverageLevel( "lto-coverage-level", diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc index 09cda9e2..8878d3b1 100644 --- a/instrumentation/SanitizerCoveragePCGUARD.so.cc +++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc @@ -52,49 +52,39 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const SanCovTracePCIndirName = - "__sanitizer_cov_trace_pc_indir"; -static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; -static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1"; -static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2"; -static const char *const SanCovTraceCmp4 = "__sanitizer_cov_trace_cmp4"; -static const char *const SanCovTraceCmp8 = "__sanitizer_cov_trace_cmp8"; -static const char *const SanCovTraceConstCmp1 = - "__sanitizer_cov_trace_const_cmp1"; -static const char *const SanCovTraceConstCmp2 = - "__sanitizer_cov_trace_const_cmp2"; -static const char *const SanCovTraceConstCmp4 = - "__sanitizer_cov_trace_const_cmp4"; -static const char *const SanCovTraceConstCmp8 = - "__sanitizer_cov_trace_const_cmp8"; -static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4"; -static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8"; -static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep"; -static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch"; -static const char *const SanCovModuleCtorTracePcGuardName = +const char SanCovTracePCIndirName[] = "__sanitizer_cov_trace_pc_indir"; +const char SanCovTracePCName[] = "__sanitizer_cov_trace_pc"; +const char SanCovTraceCmp1[] = "__sanitizer_cov_trace_cmp1"; +const char SanCovTraceCmp2[] = "__sanitizer_cov_trace_cmp2"; +const char SanCovTraceCmp4[] = "__sanitizer_cov_trace_cmp4"; +const char SanCovTraceCmp8[] = "__sanitizer_cov_trace_cmp8"; +const char SanCovTraceConstCmp1[] = "__sanitizer_cov_trace_const_cmp1"; +const char SanCovTraceConstCmp2[] = "__sanitizer_cov_trace_const_cmp2"; +const char SanCovTraceConstCmp4[] = "__sanitizer_cov_trace_const_cmp4"; +const char SanCovTraceConstCmp8[] = "__sanitizer_cov_trace_const_cmp8"; +const char SanCovTraceDiv4[] = "__sanitizer_cov_trace_div4"; +const char SanCovTraceDiv8[] = "__sanitizer_cov_trace_div8"; +const char SanCovTraceGep[] = "__sanitizer_cov_trace_gep"; +const char SanCovTraceSwitchName[] = "__sanitizer_cov_trace_switch"; +const char SanCovModuleCtorTracePcGuardName[] = "sancov.module_ctor_trace_pc_guard"; -static const char *const SanCovModuleCtor8bitCountersName = +const char SanCovModuleCtor8bitCountersName[] = "sancov.module_ctor_8bit_counters"; -static const char *const SanCovModuleCtorBoolFlagName = - "sancov.module_ctor_bool_flag"; +const char SanCovModuleCtorBoolFlagName[] = "sancov.module_ctor_bool_flag"; static const uint64_t SanCtorAndDtorPriority = 2; -static const char *const SanCovTracePCGuardName = - "__sanitizer_cov_trace_pc_guard"; -static const char *const SanCovTracePCGuardInitName = - "__sanitizer_cov_trace_pc_guard_init"; -static const char *const SanCov8bitCountersInitName = - "__sanitizer_cov_8bit_counters_init"; -static const char *const SanCovBoolFlagInitName = - "__sanitizer_cov_bool_flag_init"; -static const char *const SanCovPCsInitName = "__sanitizer_cov_pcs_init"; +const char SanCovTracePCGuardName[] = "__sanitizer_cov_trace_pc_guard"; +const char SanCovTracePCGuardInitName[] = "__sanitizer_cov_trace_pc_guard_init"; +const char SanCov8bitCountersInitName[] = "__sanitizer_cov_8bit_counters_init"; +const char SanCovBoolFlagInitName[] = "__sanitizer_cov_bool_flag_init"; +const char SanCovPCsInitName[] = "__sanitizer_cov_pcs_init"; -static const char *const SanCovGuardsSectionName = "sancov_guards"; -static const char *const SanCovCountersSectionName = "sancov_cntrs"; -static const char *const SanCovBoolFlagSectionName = "sancov_bools"; -static const char *const SanCovPCsSectionName = "sancov_pcs"; +const char SanCovGuardsSectionName[] = "sancov_guards"; +const char SanCovCountersSectionName[] = "sancov_cntrs"; +const char SanCovBoolFlagSectionName[] = "sancov_bools"; +const char SanCovPCsSectionName[] = "sancov_pcs"; -static const char *const SanCovLowestStackName = "__sancov_lowest_stack"; +const char SanCovLowestStackName[] = "__sancov_lowest_stack"; static char *skip_nozero; @@ -320,12 +310,12 @@ std::pair ModuleSanitizerCoverage::CreateSecStartEnd( Module &M, const char *Section, Type *Ty) { GlobalVariable *SecStart = new GlobalVariable( - M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage, - nullptr, getSectionStart(Section)); + M, Ty->getPointerElementType(), false, + GlobalVariable::ExternalWeakLinkage, nullptr, getSectionStart(Section)); SecStart->setVisibility(GlobalValue::HiddenVisibility); GlobalVariable *SecEnd = new GlobalVariable( - M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage, - nullptr, getSectionEnd(Section)); + M, Ty->getPointerElementType(), false, + GlobalVariable::ExternalWeakLinkage, nullptr, getSectionEnd(Section)); SecEnd->setVisibility(GlobalValue::HiddenVisibility); IRBuilder<> IRB(M.getContext()); if (!TargetTriple.isOSBinFormatCOFF()) @@ -573,7 +563,7 @@ bool ModuleSanitizerCoverage::instrumentModule( } // True if block has successors and it dominates all of them. -static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { +bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { if (succ_begin(BB) == succ_end(BB)) return false; @@ -588,8 +578,7 @@ static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { } // True if block has predecessors and it postdominates all of them. -static bool isFullPostDominator(const BasicBlock * BB, - const PostDominatorTree *PDT) { +bool isFullPostDominator(const BasicBlock *BB, const PostDominatorTree *PDT) { if (pred_begin(BB) == pred_end(BB)) return false; @@ -603,10 +592,10 @@ static bool isFullPostDominator(const BasicBlock * BB, } -static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, - const DominatorTree * DT, - const PostDominatorTree * PDT, - const SanitizerCoverageOptions &Options) { +bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, + const DominatorTree * DT, + const PostDominatorTree * PDT, + const SanitizerCoverageOptions &Options) { // Don't insert coverage for blocks containing nothing but unreachable: we // will never call __sanitizer_cov() for them, so counting them in @@ -636,8 +625,7 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, // A twist here is that we treat From->To as a backedge if // * To dominates From or // * To->UniqueSuccessor dominates From -static bool IsBackEdge(BasicBlock *From, BasicBlock *To, - const DominatorTree *DT) { +bool IsBackEdge(BasicBlock *From, BasicBlock *To, const DominatorTree *DT) { if (DT->dominates(To, From)) return true; if (auto Next = To->getUniqueSuccessor()) @@ -651,8 +639,8 @@ static bool IsBackEdge(BasicBlock *From, BasicBlock *To, // // Note that Cmp pruning is controlled by the same flag as the // BB pruning. -static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT, - const SanitizerCoverageOptions &Options) { +bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT, + const SanitizerCoverageOptions &Options) { if (!Options.NoPrune) if (CMP->hasOneUse()) @@ -1046,7 +1034,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (IsEntryBB) { - // Keep static allocas and llvm.localescape calls in the entry block. Even + // Keep allocas and llvm.localescape calls in the entry block. Even // if we aren't splitting the block, it's nice for allocas to be before // calls. IP = PrepareToSplitEntryBlock(BB, IP); @@ -1221,17 +1209,17 @@ ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( } -static void registerPCGUARDPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { +void registerPCGUARDPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { auto p = new ModuleSanitizerCoverageLegacyPass(); PM.add(p); } -static RegisterStandardPasses RegisterCompTransPass( +RegisterStandardPasses RegisterCompTransPass( PassManagerBuilder::EP_OptimizerLast, registerPCGUARDPass); -static RegisterStandardPasses RegisterCompTransPass0( +RegisterStandardPasses RegisterCompTransPass0( PassManagerBuilder::EP_EnabledOnOptLevel0, registerPCGUARDPass); diff --git a/utils/afl_proxy/afl-proxy.c b/utils/afl_proxy/afl-proxy.c index aa7a361a..a80d8a0b 100644 --- a/utils/afl_proxy/afl-proxy.c +++ b/utils/afl_proxy/afl-proxy.c @@ -70,12 +70,18 @@ static void __afl_map_shm(void) { char *id_str = getenv(SHM_ENV_VAR); char *ptr; + + /* NOTE TODO BUG FIXME: if you want to supply a variable sized map then + uncomment the following: */ + + /* if ((ptr = getenv("AFL_MAP_SIZE")) != NULL) { u32 val = atoi(ptr); if (val > 0) __afl_map_size = val; } + */ if (__afl_map_size > MAP_SIZE) { diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index 8ac054a6..556f6420 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -7,7 +7,7 @@ ifneq "" "$(LLVM_BINDIR)" LLVM_BINDIR := $(LLVM_BINDIR)/ endif -CFLAGS := -O3 -funroll-loops -g +CFLAGS := -O3 -funroll-loops -g -fPIC all: libAFLDriver.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so @@ -36,7 +36,7 @@ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o -$(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - -$(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c + -$(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c -- cgit 1.4.1 From 070c9923e22af0f577ac49f1fc44448a0e00aca2 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 30 Apr 2021 10:33:00 +0200 Subject: nits --- frida_mode/src/util.c | 3 ++- utils/afl_proxy/afl-proxy.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/frida_mode/src/util.c b/frida_mode/src/util.c index bd13781d..86b94970 100644 --- a/frida_mode/src/util.c +++ b/frida_mode/src/util.c @@ -20,7 +20,8 @@ guint64 util_read_address(char *key) { if (!g_ascii_isxdigit(*c)) { - FATAL("Invalid address not formed of hex digits: %s ('%c')\n", value_str, *c); + FATAL("Invalid address not formed of hex digits: %s ('%c')\n", value_str, + *c); } diff --git a/utils/afl_proxy/afl-proxy.c b/utils/afl_proxy/afl-proxy.c index a80d8a0b..2d8ba991 100644 --- a/utils/afl_proxy/afl-proxy.c +++ b/utils/afl_proxy/afl-proxy.c @@ -70,7 +70,6 @@ static void __afl_map_shm(void) { char *id_str = getenv(SHM_ENV_VAR); char *ptr; - /* NOTE TODO BUG FIXME: if you want to supply a variable sized map then uncomment the following: */ @@ -81,6 +80,7 @@ static void __afl_map_shm(void) { if (val > 0) __afl_map_size = val; } + */ if (__afl_map_size > MAP_SIZE) { -- cgit 1.4.1 From f4cc718fdc4571f56280a1efad3645125bee2154 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 30 Apr 2021 13:56:23 +0200 Subject: let aflpp_qemu_driver_hook.so build fail gracefully --- utils/aflpp_driver/GNUmakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index 556f6420..ad99b893 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -33,10 +33,10 @@ libAFLQemuDriver.a: aflpp_qemu_driver.o -cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - -$(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so + -test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - -$(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c + -test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c -- cgit 1.4.1 From 86452cc959bd4b0d5fe6e60d0eefbc7848fe38e2 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 30 Apr 2021 23:41:06 +0200 Subject: fix stdin trimming --- docs/Changelog.md | 1 + src/afl-forkserver.c | 2 +- src/afl-fuzz-run.c | 10 ++++------ utils/afl_proxy/afl-proxy.c | 23 +++++++++++++++-------- 4 files changed, 21 insertions(+), 15 deletions(-) (limited to 'utils') diff --git a/docs/Changelog.md b/docs/Changelog.md index 459c2f35..6a25865d 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -20,6 +20,7 @@ sending a mail to . - add recording of previous fuzz attempts for persistent mode to allow replay of non-reproducable crashes, see AFL_PERSISTENT_RECORD in config.h and docs/envs.h + - fixed a bug when trimming for stdin targets - default cmplog level (-l) is now 2, better efficiency. - cmplog level 3 (-l 3) now performs redqueen on everything. use with care. diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index d533fd4a..a07e78b4 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -1090,7 +1090,7 @@ void afl_fsrv_write_to_testcase(afl_forkserver_t *fsrv, u8 *buf, size_t len) { #endif - if (likely(fsrv->use_shmem_fuzz && fsrv->shmem_fuzz)) { + if (likely(fsrv->use_shmem_fuzz)) { if (unlikely(len > MAX_FILE)) len = MAX_FILE; diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index a7b071a5..397d62bf 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -203,7 +203,7 @@ static void write_with_gap(afl_state_t *afl, u8 *mem, u32 len, u32 skip_at, } - if (afl->fsrv.shmem_fuzz) { + if (likely(afl->fsrv.use_shmem_fuzz)) { if (!post_process_skipped) { @@ -211,9 +211,7 @@ static void write_with_gap(afl_state_t *afl, u8 *mem, u32 len, u32 skip_at, memcpy(afl->fsrv.shmem_fuzz, new_mem, new_size); - } - - else { + } else { memcpy(afl->fsrv.shmem_fuzz, mem, skip_at); @@ -244,7 +242,7 @@ static void write_with_gap(afl_state_t *afl, u8 *mem, u32 len, u32 skip_at, return; - } else if (afl->fsrv.out_file) { + } else if (unlikely(!afl->fsrv.use_stdin)) { if (unlikely(afl->no_unlink)) { @@ -279,7 +277,7 @@ static void write_with_gap(afl_state_t *afl, u8 *mem, u32 len, u32 skip_at, } - if (!afl->fsrv.out_file) { + if (afl->fsrv.use_stdin) { if (ftruncate(fd, new_size)) { PFATAL("ftruncate() failed"); } lseek(fd, 0, SEEK_SET); diff --git a/utils/afl_proxy/afl-proxy.c b/utils/afl_proxy/afl-proxy.c index 2d8ba991..6006e238 100644 --- a/utils/afl_proxy/afl-proxy.c +++ b/utils/afl_proxy/afl-proxy.c @@ -195,10 +195,7 @@ static u32 __afl_next_testcase(u8 *buf, u32 max_len) { /* report that we are starting the target */ if (write(FORKSRV_FD + 1, &res, 4) != 4) return 0; - if (status < 1) - return 0; - else - return status; + return status; } @@ -216,7 +213,7 @@ int main(int argc, char *argv[]) { /* This is were the testcase data is written into */ u8 buf[1024]; // this is the maximum size for a test case! set it! - u32 len; + s32 len; /* here you specify the map size you need that you are reporting to afl-fuzz. Any value is fine as long as it can be divided by 32. */ @@ -228,10 +225,20 @@ int main(int argc, char *argv[]) { while ((len = __afl_next_testcase(buf, sizeof(buf))) > 0) { - /* here you have to create the magic that feeds the buf/len to the - target and write the coverage to __afl_area_ptr */ + if (len > 4) { // the minimum data size you need for the target - // ... the magic ... + /* here you have to create the magic that feeds the buf/len to the + target and write the coverage to __afl_area_ptr */ + + // ... the magic ... + + // remove this, this is just to make afl-fuzz not complain when run + if (buf[0] == 0xff) + __afl_area_ptr[1] = 1; + else + __afl_area_ptr[2] = 2; + + } /* report the test case is done and wait for the next */ __afl_end_testcase(); -- cgit 1.4.1 From d2e85cce5048f36aef27a26d907670dda61837e4 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 30 May 2021 00:36:56 +0200 Subject: afl-cmin help fix, aflpp_driver - + @@ support --- afl-cmin | 8 ++--- frida_mode/src/instrument/instrument_debug.c | 1 - utils/aflpp_driver/README.md | 6 ++++ utils/aflpp_driver/aflpp_driver.c | 44 +++++++++++++++++++++------- 4 files changed, 44 insertions(+), 15 deletions(-) (limited to 'utils') diff --git a/afl-cmin b/afl-cmin index adcbb221..9fa63ec6 100755 --- a/afl-cmin +++ b/afl-cmin @@ -119,13 +119,13 @@ function usage() { "Environment variables used:\n" \ "AFL_ALLOW_TMP: allow unsafe use of input/output directories under {/var}/tmp\n" \ "AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n" \ -"AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the target to come up, initially\n" \ +"AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up\n" \ "AFL_KEEP_TRACES: leave the temporary /.traces directory\n" \ -"AFL_KILL_SIGNAL: Signal ID delivered to child processes on timeout, etc. (default: SIGKILL)\n" -"AFL_PATH: path for the afl-showmap binary if not found anywhere else\n" \ +"AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)\n" \ +"AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH\n" \ "AFL_PRINT_FILENAMES: If set, the filename currently processed will be " \ "printed to stdout\n" \ -"AFL_SKIP_BIN_CHECK: skip check for target binary\n" +"AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary\n" exit 1 } diff --git a/frida_mode/src/instrument/instrument_debug.c b/frida_mode/src/instrument/instrument_debug.c index 124843d8..be72ef89 100644 --- a/frida_mode/src/instrument/instrument_debug.c +++ b/frida_mode/src/instrument/instrument_debug.c @@ -20,7 +20,6 @@ static void instrument_debug(char *format, ...) { int len; va_start(ap, format); - ret = vsnprintf(buffer, sizeof(buffer) - 1, format, ap); va_end(ap); diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md index 01bd10c0..f03c2fe3 100644 --- a/utils/aflpp_driver/README.md +++ b/utils/aflpp_driver/README.md @@ -13,6 +13,12 @@ If this is the clang compile command to build for libfuzzer: then just switch `clang++` with `afl-clang-fast++` and our compiler will magically insert libAFLDriver.a :) +To use shared-memory testcases, you need nothing to do. +To use stdin testcases give `-` as the only command line parameter. +To use file input testcases give `@@` as the only command line parameter. + +IMPORTANT: if you use `afl-cmin` or `afl-cmin.bash` then either pass `-` +or `@@` as command line parameters. ## aflpp_qemu_driver diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c index ad781e64..c094c425 100644 --- a/utils/aflpp_driver/aflpp_driver.c +++ b/utils/aflpp_driver/aflpp_driver.c @@ -174,11 +174,17 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { static int ExecuteFilesOnyByOne(int argc, char **argv) { unsigned char *buf = (unsigned char *)malloc(MAX_FILE); + for (int i = 1; i < argc; i++) { - int fd = open(argv[i], O_RDONLY); - if (fd == -1) continue; + int fd = 0; + + if (strcmp(argv[i], "-") != 0) { fd = open(argv[i], O_RDONLY); } + + if (fd == -1) { continue; } + ssize_t length = read(fd, buf, MAX_FILE); + if (length > 0) { printf("Reading %zu bytes from %s\n", length, argv[i]); @@ -187,7 +193,7 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) { } - close(fd); + if (fd > 0) { close(fd); } } @@ -199,15 +205,19 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) { int main(int argc, char **argv) { printf( - "======================= INFO =========================\n" + "============================== INFO ================================\n" "This binary is built for afl++.\n" + "To use with afl-cmin or afl-cmin.bash pass '-' as single command line " + "option\n" "To run the target function on individual input(s) execute this:\n" " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" "To fuzz with afl-fuzz execute this:\n" " afl-fuzz [afl-flags] -- %s [-N]\n" "afl-fuzz will run N iterations before re-spawning the process (default: " "INT_MAX)\n" - "======================================================\n", + "For stdin input processing, pass '-' as single command line option.\n" + "For file input processing, pass '@@' as single command line option.\n" + "===================================================================\n", argv[0], argv[0]); if (getenv("AFL_GDB")) { @@ -237,22 +247,35 @@ int main(int argc, char **argv) { memcpy(dummy_input, (void *)AFL_PERSISTENT, sizeof(AFL_PERSISTENT)); memcpy(dummy_input + 32, (void *)AFL_DEFER_FORKSVR, sizeof(AFL_DEFER_FORKSVR)); + int N = INT_MAX; - if (argc == 2 && argv[1][0] == '-') + + if (argc == 2 && !strcmp(argv[1], "-")) { + + __afl_sharedmem_fuzzing = 0; + __afl_manual_init(); + return ExecuteFilesOnyByOne(argc, argv); + + } else if (argc == 2 && argv[1][0] == '-') { + N = atoi(argv[1] + 1); - else if (argc == 2 && (N = atoi(argv[1])) > 0) + + } else if (argc == 2 && (N = atoi(argv[1])) > 0) { + printf("WARNING: using the deprecated call style `%s %d`\n", argv[0], N); - else if (argc > 1) { + + } else if (argc > 1) { __afl_sharedmem_fuzzing = 0; - __afl_manual_init(); + + if (argc == 2) { __afl_manual_init(); } + return ExecuteFilesOnyByOne(argc, argv); } assert(N > 0); - // if (!getenv("AFL_DRIVER_DONT_DEFER")) __afl_manual_init(); // Call LLVMFuzzerTestOneInput here so that coverage caused by initialization @@ -271,6 +294,7 @@ int main(int argc, char **argv) { fprintf(stderr, "%02x", __afl_fuzz_ptr[i]); fprintf(stderr, "\n"); #endif + if (*__afl_fuzz_len) { num_runs++; -- cgit 1.4.1 From e0aa411647e1a525a3a0488d929ec71611388d54 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 9 Jun 2021 20:26:37 +0200 Subject: add test cases for splitting integer comparisons --- instrumentation/split-compares-pass.so.cc | 1009 +++++++++++++---------------- test/test-int_cases.c | 424 ++++++++++++ test/test-uint_cases.c | 217 +++++++ utils/crash_triage/triage_crashes.sh | 4 + 4 files changed, 1093 insertions(+), 561 deletions(-) create mode 100644 test/test-int_cases.c create mode 100644 test/test-uint_cases.c (limited to 'utils') diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc index b02a89fb..3dbf7878 100644 --- a/instrumentation/split-compares-pass.so.cc +++ b/instrumentation/split-compares-pass.so.cc @@ -47,50 +47,99 @@ using namespace llvm; #include "afl-llvm-common.h" +// uncomment this toggle function verification at each step. horribly slow, but +// helps to pinpoint a potential problem in the splitting code. +//#define VERIFY_TOO_MUCH 1 + namespace { class SplitComparesTransform : public ModulePass { - public: static char ID; SplitComparesTransform() : ModulePass(ID), enableFPSplit(0) { - initInstrumentList(); - } bool runOnModule(Module &M) override; #if LLVM_VERSION_MAJOR >= 4 StringRef getPassName() const override { - #else const char *getPassName() const override { #endif - return "simplifies and splits ICMP instructions"; - + return "AFL_SplitComparesTransform"; } private: int enableFPSplit; - size_t splitIntCompares(Module &M, unsigned bitw); + unsigned target_bitwidth = 8; + + size_t count = 0; + size_t splitFPCompares(Module &M); - bool simplifyCompares(Module &M); bool simplifyFPCompares(Module &M); - bool simplifyIntSignedness(Module &M); size_t nextPowerOfTwo(size_t in); + /// simplify the comparison and then split the comparison until the + /// target_bitwidth is reached. + bool simplifyAndSplit(CmpInst *I, Module &M); + /// simplify a non-strict comparison (e.g., less than or equals) + bool simplifyOrEqualsCompare(CmpInst *IcmpInst, Module &M, + std::vector &worklist); + /// simplify a signed comparison (signed less or greater than) + bool simplifySignedCompare(CmpInst *IcmpInst, Module &M, + std::vector &worklist); + /// splits an icmp into nested icmps recursivly until target_bitwidth is + /// reached + bool splitCompare(CmpInst *I, Module &M); + + /// print an error to llvm's errs stream, but only if not ordered to be quiet + void reportError(const StringRef msg, Instruction *I, Module &M) { + if (!be_quiet) { + errs() << "[AFL++ SplitComparesTransform] ERROR: " << msg << "\n"; + if (debug) { + if (I) { + errs() << "Instruction = " << *I << "\n"; + if (auto BB = I->getParent()) { + if (auto F = BB->getParent()) { + if (F->hasName()) { + errs() << "|-> in function " << F->getName() << " "; + } + } + } + } + auto n = M.getName(); + if (n.size() > 0) { errs() << "in module " << n << "\n"; } + } + } + } + + bool isSupportedBitWidth(unsigned bitw) { + // IDK whether the icmp code works on other bitwidths. I guess not? So we + // try to avoid dealing with other weird icmp's that llvm might use (looking + // at you `icmp i0`). + switch (bitw) { + case 8: + case 16: + case 32: + case 64: + case 128: + case 256: + return true; + default: + return false; + } + } }; } // namespace char SplitComparesTransform::ID = 0; -/* This function splits FCMP instructions with xGE or xLE predicates into two - * FCMP instructions with predicate xGT or xLT and EQ */ +/// This function splits FCMP instructions with xGE or xLE predicates into two +/// FCMP instructions with predicate xGT or xLT and EQ bool SplitComparesTransform::simplifyFPCompares(Module &M) { - LLVMContext & C = M.getContext(); std::vector fcomps; IntegerType * Int1Ty = IntegerType::getInt1Ty(C); @@ -98,23 +147,18 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { /* iterate over all functions, bbs and instruction and add * all integer comparisons with >= and <= predicates to the icomps vector */ for (auto &F : M) { - if (!isInInstrumentList(&F)) continue; for (auto &BB : F) { - for (auto &IN : BB) { - CmpInst *selectcmpInst = nullptr; if ((selectcmpInst = dyn_cast(&IN))) { - if (enableFPSplit && (selectcmpInst->getPredicate() == CmpInst::FCMP_OGE || selectcmpInst->getPredicate() == CmpInst::FCMP_UGE || selectcmpInst->getPredicate() == CmpInst::FCMP_OLE || selectcmpInst->getPredicate() == CmpInst::FCMP_ULE)) { - auto op0 = selectcmpInst->getOperand(0); auto op1 = selectcmpInst->getOperand(1); @@ -127,22 +171,16 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; } fcomps.push_back(selectcmpInst); - } - } - } - } - } if (!fcomps.size()) { return false; } /* transform for floating point */ for (auto &FcmpInst : fcomps) { - BasicBlock *bb = FcmpInst->getParent(); auto op0 = FcmpInst->getOperand(0); @@ -155,7 +193,6 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { CmpInst::Predicate new_pred; switch (pred) { - case CmpInst::FCMP_UGE: new_pred = CmpInst::FCMP_UGT; break; @@ -170,7 +207,6 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { break; default: // keep the compiler happy continue; - } /* split before the fcmp instruction */ @@ -214,305 +250,428 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { /* replace the old FcmpInst with our new and shiny PHI inst */ BasicBlock::iterator ii(FcmpInst); ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN); - } return true; - } -/* This function splits ICMP instructions with xGE or xLE predicates into two - * ICMP instructions with predicate xGT or xLT and EQ */ -bool SplitComparesTransform::simplifyCompares(Module &M) { - - LLVMContext & C = M.getContext(); - std::vector icomps; - IntegerType * Int1Ty = IntegerType::getInt1Ty(C); - - /* iterate over all functions, bbs and instruction and add - * all integer comparisons with >= and <= predicates to the icomps vector */ - for (auto &F : M) { - - if (!isInInstrumentList(&F)) continue; - - for (auto &BB : F) { +/// This function splits ICMP instructions with xGE or xLE predicates into two +/// ICMP instructions with predicate xGT or xLT and EQ +bool SplitComparesTransform::simplifyOrEqualsCompare( + CmpInst *IcmpInst, Module &M, std::vector &worklist) { + LLVMContext &C = M.getContext(); + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); - for (auto &IN : BB) { + /* find out what the new predicate is going to be */ + auto cmp_inst = dyn_cast(IcmpInst); + if (!cmp_inst) { return false; } + + BasicBlock *bb = IcmpInst->getParent(); - CmpInst *selectcmpInst = nullptr; + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); - if ((selectcmpInst = dyn_cast(&IN))) { + CmpInst::Predicate pred = cmp_inst->getPredicate(); + CmpInst::Predicate new_pred; - if (selectcmpInst->getPredicate() == CmpInst::ICMP_UGE || - selectcmpInst->getPredicate() == CmpInst::ICMP_SGE || - selectcmpInst->getPredicate() == CmpInst::ICMP_ULE || - selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) { + switch (pred) { + case CmpInst::ICMP_UGE: + new_pred = CmpInst::ICMP_UGT; + break; + case CmpInst::ICMP_SGE: + new_pred = CmpInst::ICMP_SGT; + break; + case CmpInst::ICMP_ULE: + new_pred = CmpInst::ICMP_ULT; + break; + case CmpInst::ICMP_SLE: + new_pred = CmpInst::ICMP_SLT; + break; + default: // keep the compiler happy + return false; + } - auto op0 = selectcmpInst->getOperand(0); - auto op1 = selectcmpInst->getOperand(1); + /* split before the icmp instruction */ + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* the old bb now contains a unconditional jump to the new one (end_bb) + * we need to delete it later */ + + /* create the ICMP instruction with new_pred and add it to the old basic + * block bb it is now at the position where the old IcmpInst was */ + CmpInst *icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), icmp_np); + + /* create a new basic block which holds the new EQ icmp */ + CmpInst *icmp_eq; + /* insert middle_bb before end_bb */ + BasicBlock *middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1); + middle_bb->getInstList().push_back(icmp_eq); + /* add an unconditional branch to the end of middle_bb with destination + * end_bb */ + BranchInst::Create(end_bb, middle_bb); + + /* replace the uncond branch with a conditional one, which depends on the + * new_pred icmp. True goes to end, false to the middle (injected) bb */ + auto term = bb->getTerminator(); + BranchInst::Create(end_bb, middle_bb, icmp_np, bb); + term->eraseFromParent(); + + /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI + * inst to wire up the loose ends */ + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + /* the first result depends on the outcome of icmp_eq */ + PN->addIncoming(icmp_eq, middle_bb); + /* if the source was the original bb we know that the icmp_np yielded true + * hence we can hardcode this value */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + /* replace the old IcmpInst with our new and shiny PHI inst */ + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + worklist.push_back(icmp_np); + worklist.push_back(icmp_eq); - IntegerType *intTyOp0 = dyn_cast(op0->getType()); - IntegerType *intTyOp1 = dyn_cast(op1->getType()); + return true; +} - /* this is probably not needed but we do it anyway */ - if (!intTyOp0 || !intTyOp1) { continue; } +/// Simplify a signed comparison operator by splitting it into a unsigned and +/// bit comparison. add all resulting comparisons to +/// the worklist passed as a reference. +bool SplitComparesTransform::simplifySignedCompare( + CmpInst *IcmpInst, Module &M, std::vector &worklist) { + LLVMContext &C = M.getContext(); + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); - icomps.push_back(selectcmpInst); + BasicBlock *bb = IcmpInst->getParent(); - } + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); - } + IntegerType *intTyOp0 = dyn_cast(op0->getType()); + if (!intTyOp0) { return false; } + unsigned bitw = intTyOp0->getBitWidth(); + IntegerType *IntType = IntegerType::get(C, bitw); - } + /* get the new predicate */ + auto cmp_inst = dyn_cast(IcmpInst); + if (!cmp_inst) { return false; } + auto pred = cmp_inst->getPredicate(); + CmpInst::Predicate new_pred; - } + if (pred == CmpInst::ICMP_SGT) { + new_pred = CmpInst::ICMP_UGT; + } else { + new_pred = CmpInst::ICMP_ULT; } - if (!icomps.size()) { return false; } - - for (auto &IcmpInst : icomps) { - - BasicBlock *bb = IcmpInst->getParent(); - - auto op0 = IcmpInst->getOperand(0); - auto op1 = IcmpInst->getOperand(1); - - /* find out what the new predicate is going to be */ - auto cmp_inst = dyn_cast(IcmpInst); - if (!cmp_inst) { continue; } - auto pred = cmp_inst->getPredicate(); - CmpInst::Predicate new_pred; + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* create a 1 bit compare for the sign bit. to do this shift and trunc + * the original operands so only the first bit remains.*/ + Value *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit; + + IRBuilder<> IRB(bb->getTerminator()); + s_op0 = IRB.CreateLShr(op0, ConstantInt::get(IntType, bitw - 1)); + t_op0 = IRB.CreateTruncOrBitCast(s_op0, Int1Ty); + s_op1 = IRB.CreateLShr(op1, ConstantInt::get(IntType, bitw - 1)); + t_op1 = IRB.CreateTruncOrBitCast(s_op1, Int1Ty); + /* compare of the sign bits */ + icmp_sign_bit = IRB.CreateCmp(CmpInst::ICMP_EQ, t_op0, t_op1); + + /* create a new basic block which is executed if the signedness bit is + * different */ + CmpInst * icmp_inv_sig_cmp; + BasicBlock *sign_bb = + BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb); + if (pred == CmpInst::ICMP_SGT) { + /* if we check for > and the op0 positive and op1 negative then the final + * result is true. if op0 negative and op1 pos, the cmp must result + * in false + */ + icmp_inv_sig_cmp = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1); - switch (pred) { + } else { + /* just the inverse of the above statement */ + icmp_inv_sig_cmp = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1); + } - case CmpInst::ICMP_UGE: - new_pred = CmpInst::ICMP_UGT; - break; - case CmpInst::ICMP_SGE: - new_pred = CmpInst::ICMP_SGT; - break; - case CmpInst::ICMP_ULE: - new_pred = CmpInst::ICMP_ULT; - break; - case CmpInst::ICMP_SLE: - new_pred = CmpInst::ICMP_SLT; - break; - default: // keep the compiler happy - continue; + sign_bb->getInstList().push_back(icmp_inv_sig_cmp); + BranchInst::Create(end_bb, sign_bb); - } + /* create a new bb which is executed if signedness is equal */ + CmpInst * icmp_usign_cmp; + BasicBlock *middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + /* we can do a normal unsigned compare now */ + icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - /* split before the icmp instruction */ - BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + middle_bb->getInstList().push_back(icmp_usign_cmp); + BranchInst::Create(end_bb, middle_bb); - /* the old bb now contains a unconditional jump to the new one (end_bb) - * we need to delete it later */ + auto term = bb->getTerminator(); + /* if the sign is eq do a normal unsigned cmp, else we have to check the + * signedness bit */ + BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb); + term->eraseFromParent(); - /* create the ICMP instruction with new_pred and add it to the old basic - * block bb it is now at the position where the old IcmpInst was */ - Instruction *icmp_np; - icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), - icmp_np); + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); - /* create a new basic block which holds the new EQ icmp */ - Instruction *icmp_eq; - /* insert middle_bb before end_bb */ - BasicBlock *middle_bb = - BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); - icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1); - middle_bb->getInstList().push_back(icmp_eq); - /* add an unconditional branch to the end of middle_bb with destination - * end_bb */ - BranchInst::Create(end_bb, middle_bb); + PN->addIncoming(icmp_usign_cmp, middle_bb); + PN->addIncoming(icmp_inv_sig_cmp, sign_bb); - /* replace the uncond branch with a conditional one, which depends on the - * new_pred icmp. True goes to end, false to the middle (injected) bb */ - auto term = bb->getTerminator(); - BranchInst::Create(end_bb, middle_bb, icmp_np, bb); - term->eraseFromParent(); + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); - /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI - * inst to wire up the loose ends */ - PHINode *PN = PHINode::Create(Int1Ty, 2, ""); - /* the first result depends on the outcome of icmp_eq */ - PN->addIncoming(icmp_eq, middle_bb); - /* if the source was the original bb we know that the icmp_np yielded true - * hence we can hardcode this value */ - PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); - /* replace the old IcmpInst with our new and shiny PHI inst */ - BasicBlock::iterator ii(IcmpInst); - ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + // save for later + worklist.push_back(icmp_usign_cmp); - } + // signed comparisons are not supported by the splitting code, so we must not + // add it to the worklist. + // worklist.push_back(icmp_inv_sig_cmp); return true; - } -/* this function transforms signed compares to equivalent unsigned compares */ -bool SplitComparesTransform::simplifyIntSignedness(Module &M) { - - LLVMContext & C = M.getContext(); - std::vector icomps; - IntegerType * Int1Ty = IntegerType::getInt1Ty(C); - - /* iterate over all functions, bbs and instructions and add - * all signed compares to icomps vector */ - for (auto &F : M) { +bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M) { + auto pred = cmp_inst->getPredicate(); + switch (pred) { + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_ULT: + break; + default: + // unsupported predicate! + return false; + } - if (!isInInstrumentList(&F)) continue; + auto op0 = cmp_inst->getOperand(0); + auto op1 = cmp_inst->getOperand(1); - for (auto &BB : F) { + // get bitwidth by checking the bitwidth of the first operator + IntegerType *intTyOp0 = dyn_cast(op0->getType()); + if (!intTyOp0) { + // not an integer type + return false; + } - for (auto &IN : BB) { + unsigned bitw = intTyOp0->getBitWidth(); + if (bitw == target_bitwidth) { + // already the target bitwidth so we have to do nothing here. + return true; + } - CmpInst *selectcmpInst = nullptr; + LLVMContext &C = M.getContext(); + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + BasicBlock *bb = cmp_inst->getParent(); + IntegerType *OldIntType = IntegerType::get(C, bitw); + IntegerType *NewIntType = IntegerType::get(C, bitw / 2); + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(cmp_inst)); + CmpInst *icmp_high, *icmp_low; - if ((selectcmpInst = dyn_cast(&IN))) { + /* create the comparison of the top halves of the original operands */ + Instruction *s_op0, *op0_high, *s_op1, *op1_high; - if (selectcmpInst->getPredicate() == CmpInst::ICMP_SGT || - selectcmpInst->getPredicate() == CmpInst::ICMP_SLT) { + s_op0 = BinaryOperator::Create(Instruction::LShr, op0, + ConstantInt::get(OldIntType, bitw / 2)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); + op0_high = new TruncInst(s_op0, NewIntType); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), op0_high); - auto op0 = selectcmpInst->getOperand(0); - auto op1 = selectcmpInst->getOperand(1); + s_op1 = BinaryOperator::Create(Instruction::LShr, op1, + ConstantInt::get(OldIntType, bitw / 2)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); + op1_high = new TruncInst(s_op1, NewIntType); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), op1_high); - IntegerType *intTyOp0 = dyn_cast(op0->getType()); - IntegerType *intTyOp1 = dyn_cast(op1->getType()); + icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_high); - /* see above */ - if (!intTyOp0 || !intTyOp1) { continue; } + PHINode *PN = nullptr; - /* i think this is not possible but to lazy to look it up */ - if (intTyOp0->getBitWidth() != intTyOp1->getBitWidth()) { + /* now we have to destinguish between == != and > < */ + if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { + /* transformation for == and != icmps */ - continue; + /* create a compare for the lower half of the original operands */ + BasicBlock *cmp_low_bb = + BasicBlock::Create(C, "" /*"injected"*/, end_bb->getParent(), end_bb); - } + Value *op0_low, *op1_low; - icomps.push_back(selectcmpInst); + IRBuilder<> Builder(cmp_low_bb); - } + op0_low = Builder.CreateTrunc(op0, NewIntType); + op1_low = Builder.CreateTrunc(op1, NewIntType); - } + icmp_low = dyn_cast(Builder.CreateICmp(pred, op0_low, op1_low)); + // icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); + // cmp_low_bb->getInstList().push_back(icmp_low); - } + BranchInst::Create(end_bb, cmp_low_bb); + /* dependent on the cmp of the high parts go to the end or go on with + * the comparison */ + auto term = bb->getTerminator(); + BranchInst *br = nullptr; + if (pred == CmpInst::ICMP_EQ) { + br = BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); + } else { + /* CmpInst::ICMP_NE */ + br = BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); } + term->eraseFromParent(); - } - - if (!icomps.size()) { return false; } - - for (auto &IcmpInst : icomps) { - - BasicBlock *bb = IcmpInst->getParent(); - - auto op0 = IcmpInst->getOperand(0); - auto op1 = IcmpInst->getOperand(1); - - IntegerType *intTyOp0 = dyn_cast(op0->getType()); - if (!intTyOp0) { continue; } - unsigned bitw = intTyOp0->getBitWidth(); - IntegerType *IntType = IntegerType::get(C, bitw); - - /* get the new predicate */ - auto cmp_inst = dyn_cast(IcmpInst); - if (!cmp_inst) { continue; } - auto pred = cmp_inst->getPredicate(); - CmpInst::Predicate new_pred; - - if (pred == CmpInst::ICMP_SGT) { - - new_pred = CmpInst::ICMP_UGT; - + /* create the PHI and connect the edges accordingly */ + PN = PHINode::Create(Int1Ty, 2, ""); + PN->addIncoming(icmp_low, cmp_low_bb); + Value *val = nullptr; + if (pred == CmpInst::ICMP_EQ) { + val = ConstantInt::get(Int1Ty, 0); } else { + /* CmpInst::ICMP_NE */ + val = ConstantInt::get(Int1Ty, 1); + } + PN->addIncoming(val, icmp_high->getParent()); - new_pred = CmpInst::ICMP_ULT; + } else { + /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */ + /* transformations for < and > */ + + /* create a basic block which checks for the inverse predicate. + * if this is true we can go to the end if not we have to go to the + * bb which checks the lower half of the operands */ + Instruction *icmp_inv_cmp, *op0_low, *op1_low; + BasicBlock * inv_cmp_bb = + BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb); + if (pred == CmpInst::ICMP_UGT) { + icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, + op0_high, op1_high); + } else { + icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, + op0_high, op1_high); } - BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + inv_cmp_bb->getInstList().push_back(icmp_inv_cmp); - /* create a 1 bit compare for the sign bit. to do this shift and trunc - * the original operands so only the first bit remains.*/ - Instruction *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit; + auto term = bb->getTerminator(); + term->eraseFromParent(); + BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb); - s_op0 = BinaryOperator::Create(Instruction::LShr, op0, - ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); - t_op0 = new TruncInst(s_op0, Int1Ty); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0); + /* create a bb which handles the cmp of the lower halves */ + BasicBlock *cmp_low_bb = + BasicBlock::Create(C, "" /*"injected"*/, end_bb->getParent(), end_bb); + op0_low = new TruncInst(op0, NewIntType); + cmp_low_bb->getInstList().push_back(op0_low); + op1_low = new TruncInst(op1, NewIntType); + cmp_low_bb->getInstList().push_back(op1_low); - s_op1 = BinaryOperator::Create(Instruction::LShr, op1, - ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); - t_op1 = new TruncInst(s_op1, Int1Ty); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1); + icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); + cmp_low_bb->getInstList().push_back(icmp_low); + BranchInst::Create(end_bb, cmp_low_bb); - /* compare of the sign bits */ - icmp_sign_bit = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), - icmp_sign_bit); + BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb); - /* create a new basic block which is executed if the signedness bit is - * different */ - Instruction *icmp_inv_sig_cmp; - BasicBlock * sign_bb = - BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb); - if (pred == CmpInst::ICMP_SGT) { + PN = PHINode::Create(Int1Ty, 3); + PN->addIncoming(icmp_low, cmp_low_bb); + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb); + } - /* if we check for > and the op0 positive and op1 negative then the final - * result is true. if op0 negative and op1 pos, the cmp must result - * in false - */ - icmp_inv_sig_cmp = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1); + BasicBlock::iterator ii(cmp_inst); + ReplaceInstWithInst(cmp_inst->getParent()->getInstList(), ii, PN); - } else { + // We split the comparison into low and high. If this isn't our target + // bitwidth we recursivly split the low and high parts again until we have + // target bitwidth. + if ((bitw / 2) > target_bitwidth) { + if (!splitCompare(icmp_high, M)) { + reportError("Failed to split high comparison", icmp_high, M); + return false; + } + if (!splitCompare(icmp_low, M)) { + reportError("Failed to split low comparison", icmp_low, M); + return false; + } + } - /* just the inverse of the above statement */ - icmp_inv_sig_cmp = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1); + return true; +} - } +bool SplitComparesTransform::simplifyAndSplit(CmpInst *I, Module &M) { + std::vector worklist; - sign_bb->getInstList().push_back(icmp_inv_sig_cmp); - BranchInst::Create(end_bb, sign_bb); + auto op0 = I->getOperand(0); + auto op1 = I->getOperand(1); + if (!op0 || !op1) { return false; } + auto op0Ty = dyn_cast(op0->getType()); + if (!op0Ty || !isa(op1->getType())) { return true; } - /* create a new bb which is executed if signedness is equal */ - Instruction *icmp_usign_cmp; - BasicBlock * middle_bb = - BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); - /* we can do a normal unsigned compare now */ - icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - middle_bb->getInstList().push_back(icmp_usign_cmp); - BranchInst::Create(end_bb, middle_bb); + unsigned bitw = op0Ty->getBitWidth(); - auto term = bb->getTerminator(); - /* if the sign is eq do a normal unsigned cmp, else we have to check the - * signedness bit */ - BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb); - term->eraseFromParent(); +#ifdef VERIFY_TOO_MUCH + auto F = I->getParent()->getParent(); +#endif - PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + // we run the comparison simplification on all compares regardless of their + // bitwidth. + if (I->getPredicate() == CmpInst::ICMP_UGE || + I->getPredicate() == CmpInst::ICMP_SGE || + I->getPredicate() == CmpInst::ICMP_ULE || + I->getPredicate() == CmpInst::ICMP_SLE) { + if (!simplifyOrEqualsCompare(I, M, worklist)) { + reportError( + "Failed to simplify inequality or equals comparison " + "(UGE,SGE,ULE,SLE)", + I, M); + } + } else if (I->getPredicate() == CmpInst::ICMP_SGT || + I->getPredicate() == CmpInst::ICMP_SLT) { + if (!simplifySignedCompare(I, M, worklist)) { + reportError("Failed to simplify signed comparison (SGT,SLT)", I, M); + } + } - PN->addIncoming(icmp_usign_cmp, middle_bb); - PN->addIncoming(icmp_inv_sig_cmp, sign_bb); +#ifdef VERIFY_TOO_MUCH + if (verifyFunction(*F, &errs())) { + reportError("simpliyfing compare lead to broken function", nullptr, M); + } +#endif - BasicBlock::iterator ii(IcmpInst); - ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + // the simplification methods replace the original CmpInst and push the + // resulting new CmpInst into the worklist. If the worklist is empty then + // we only have to split the original CmpInst. + if (worklist.size() == 0) { worklist.push_back(I); } + + for (auto cmp : worklist) { + // we split the simplified compares into comparisons with smaller bitwidths + // if they are larger than our target_bitwidth. + if (bitw > target_bitwidth) { + if (!splitCompare(cmp, M)) { + reportError("Failed to split comparison", cmp, M); + } +#ifdef VERIFY_TOO_MUCH + if (verifyFunction(*F, &errs())) { + reportError("splitting compare lead to broken function", nullptr, M); + } +#endif + } } + count++; return true; - } size_t SplitComparesTransform::nextPowerOfTwo(size_t in) { - --in; in |= in >> 1; in |= in >> 2; @@ -520,12 +679,10 @@ size_t SplitComparesTransform::nextPowerOfTwo(size_t in) { // in |= in >> 8; // in |= in >> 16; return in + 1; - } /* splits fcmps into two nested fcmps with sign compare and the rest */ size_t SplitComparesTransform::splitFPCompares(Module &M) { - size_t count = 0; LLVMContext &C = M.getContext(); @@ -537,13 +694,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { /* define unions with floating point and (sign, exponent, mantissa) triples */ if (dl.isLittleEndian()) { - } else if (dl.isBigEndian()) { - } else { - return count; - } #endif @@ -553,17 +706,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { /* get all EQ, NE, GT, and LT fcmps. if the other two * functions were executed only these four predicates should exist */ for (auto &F : M) { - if (!isInInstrumentList(&F)) continue; for (auto &BB : F) { - for (auto &IN : BB) { - CmpInst *selectcmpInst = nullptr; if ((selectcmpInst = dyn_cast(&IN))) { - if (selectcmpInst->getPredicate() == CmpInst::FCMP_OEQ || selectcmpInst->getPredicate() == CmpInst::FCMP_UEQ || selectcmpInst->getPredicate() == CmpInst::FCMP_ONE || @@ -572,7 +721,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { selectcmpInst->getPredicate() == CmpInst::FCMP_OGT || selectcmpInst->getPredicate() == CmpInst::FCMP_ULT || selectcmpInst->getPredicate() == CmpInst::FCMP_OLT) { - auto op0 = selectcmpInst->getOperand(0); auto op1 = selectcmpInst->getOperand(1); @@ -584,15 +732,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; } fcomps.push_back(selectcmpInst); - } - } - } - } - } if (!fcomps.size()) { return count; } @@ -600,7 +743,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { IntegerType *Int1Ty = IntegerType::getInt1Ty(C); for (auto &FcmpInst : fcomps) { - BasicBlock *bb = FcmpInst->getParent(); auto op0 = FcmpInst->getOperand(0); @@ -725,7 +867,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); if (sizeInBits - precision < exTySizeBytes * 8) { - m_e0 = BinaryOperator::Create( Instruction::And, t_e0, ConstantInt::get(t_e0->getType(), mask_exponent)); @@ -738,10 +879,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(signequal_bb->getTerminator()), m_e1); } else { - m_e0 = t_e0; m_e1 = t_e1; - } /* compare the exponents of the operands */ @@ -749,7 +888,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction *icmp_exponent_result; BasicBlock * signequal2_bb = signequal_bb; switch (FcmpInst->getPredicate()) { - case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: icmp_exponent_result = @@ -819,7 +957,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { break; default: continue; - } signequal2_bb->getInstList().insert( @@ -827,11 +964,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent_result); { - term = signequal2_bb->getTerminator(); switch (FcmpInst->getPredicate()) { - case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: /* if the exponents are satifying the compare do a fraction cmp in @@ -854,11 +989,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { break; default: continue; - } term->eraseFromParent(); - } /* isolate the mantissa aka fraction */ @@ -866,7 +999,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { bool needTrunc = IntFractionTy->getPrimitiveSizeInBits() < op_size; if (precision - 1 < frTySizeBytes * 8) { - Instruction *m_f0, *m_f1; m_f0 = BinaryOperator::Create( Instruction::And, b_op0, @@ -880,7 +1012,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(middle_bb->getTerminator()), m_f1); if (needTrunc) { - t_f0 = new TruncInst(m_f0, IntFractionTy); t_f1 = new TruncInst(m_f1, IntFractionTy); middle_bb->getInstList().insert( @@ -889,16 +1020,12 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { - t_f0 = m_f0; t_f1 = m_f1; - } } else { - if (needTrunc) { - t_f0 = new TruncInst(b_op0, IntFractionTy); t_f1 = new TruncInst(b_op1, IntFractionTy); middle_bb->getInstList().insert( @@ -907,12 +1034,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { - t_f0 = b_op0; t_f1 = b_op1; - } - } /* compare the fractions of the operands */ @@ -920,7 +1044,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock * middle2_bb = middle_bb; PHINode * PN2 = nullptr; switch (FcmpInst->getPredicate()) { - case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: icmp_fraction_result = @@ -943,7 +1066,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { case CmpInst::FCMP_UGT: case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: { - Instruction *icmp_fraction_result2; middle2_bb = middle_bb->splitBasicBlock( @@ -956,7 +1078,6 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { if (FcmpInst->getPredicate() == CmpInst::FCMP_OGT || FcmpInst->getPredicate() == CmpInst::FCMP_UGT) { - negative_bb->getInstList().push_back( icmp_fraction_result = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1)); @@ -965,14 +1086,12 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1)); } else { - negative_bb->getInstList().push_back( icmp_fraction_result = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1)); positive_bb->getInstList().push_back( icmp_fraction_result2 = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1)); - } BranchInst::Create(middle2_bb, negative_bb); @@ -992,13 +1111,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { default: continue; - } PHINode *PN = PHINode::Create(Int1Ty, 3, ""); switch (FcmpInst->getPredicate()) { - case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: /* unequal signs cannot be equal values */ @@ -1037,328 +1154,94 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { break; default: continue; - } BasicBlock::iterator ii(FcmpInst); ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN); ++count; - - } - - return count; - -} - -/* splits icmps of size bitw into two nested icmps with bitw/2 size each */ -size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { - - size_t count = 0; - - LLVMContext &C = M.getContext(); - - IntegerType *Int1Ty = IntegerType::getInt1Ty(C); - IntegerType *OldIntType = IntegerType::get(C, bitw); - IntegerType *NewIntType = IntegerType::get(C, bitw / 2); - - std::vector icomps; - - if (bitw % 2) { return 0; } - - /* not supported yet */ - if (bitw > 64) { return 0; } - - /* get all EQ, NE, UGT, and ULT icmps of width bitw. if the - * functions simplifyCompares() and simplifyIntSignedness() - * were executed only these four predicates should exist */ - for (auto &F : M) { - - if (!isInInstrumentList(&F)) continue; - - for (auto &BB : F) { - - for (auto &IN : BB) { - - CmpInst *selectcmpInst = nullptr; - - if ((selectcmpInst = dyn_cast(&IN))) { - - if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ || - selectcmpInst->getPredicate() == CmpInst::ICMP_NE || - selectcmpInst->getPredicate() == CmpInst::ICMP_UGT || - selectcmpInst->getPredicate() == CmpInst::ICMP_ULT) { - - auto op0 = selectcmpInst->getOperand(0); - auto op1 = selectcmpInst->getOperand(1); - - IntegerType *intTyOp0 = dyn_cast(op0->getType()); - IntegerType *intTyOp1 = dyn_cast(op1->getType()); - - if (!intTyOp0 || !intTyOp1) { continue; } - - /* check if the bitwidths are the one we are looking for */ - if (intTyOp0->getBitWidth() != bitw || - intTyOp1->getBitWidth() != bitw) { - - continue; - - } - - icomps.push_back(selectcmpInst); - - } - - } - - } - - } - - } - - if (!icomps.size()) { return 0; } - - for (auto &IcmpInst : icomps) { - - BasicBlock *bb = IcmpInst->getParent(); - - auto op0 = IcmpInst->getOperand(0); - auto op1 = IcmpInst->getOperand(1); - - auto cmp_inst = dyn_cast(IcmpInst); - if (!cmp_inst) { continue; } - auto pred = cmp_inst->getPredicate(); - - BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); - - /* create the comparison of the top halves of the original operands */ - Instruction *s_op0, *op0_high, *s_op1, *op1_high, *icmp_high; - - s_op0 = BinaryOperator::Create(Instruction::LShr, op0, - ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); - op0_high = new TruncInst(s_op0, NewIntType); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), - op0_high); - - s_op1 = BinaryOperator::Create(Instruction::LShr, op1, - ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); - op1_high = new TruncInst(s_op1, NewIntType); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), - op1_high); - - icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), - icmp_high); - - /* now we have to destinguish between == != and > < */ - if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { - - /* transformation for == and != icmps */ - - /* create a compare for the lower half of the original operands */ - Instruction *op0_low, *op1_low, *icmp_low; - BasicBlock * cmp_low_bb = - BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); - - op0_low = new TruncInst(op0, NewIntType); - cmp_low_bb->getInstList().push_back(op0_low); - - op1_low = new TruncInst(op1, NewIntType); - cmp_low_bb->getInstList().push_back(op1_low); - - icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); - cmp_low_bb->getInstList().push_back(icmp_low); - BranchInst::Create(end_bb, cmp_low_bb); - - /* dependent on the cmp of the high parts go to the end or go on with - * the comparison */ - auto term = bb->getTerminator(); - if (pred == CmpInst::ICMP_EQ) { - - BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); - - } else { - - /* CmpInst::ICMP_NE */ - BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); - - } - - term->eraseFromParent(); - - /* create the PHI and connect the edges accordingly */ - PHINode *PN = PHINode::Create(Int1Ty, 2, ""); - PN->addIncoming(icmp_low, cmp_low_bb); - if (pred == CmpInst::ICMP_EQ) { - - PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb); - - } else { - - /* CmpInst::ICMP_NE */ - PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); - - } - - /* replace the old icmp with the new PHI */ - BasicBlock::iterator ii(IcmpInst); - ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); - - } else { - - /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */ - /* transformations for < and > */ - - /* create a basic block which checks for the inverse predicate. - * if this is true we can go to the end if not we have to go to the - * bb which checks the lower half of the operands */ - Instruction *icmp_inv_cmp, *op0_low, *op1_low, *icmp_low; - BasicBlock * inv_cmp_bb = - BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb); - if (pred == CmpInst::ICMP_UGT) { - - icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, - op0_high, op1_high); - - } else { - - icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, - op0_high, op1_high); - - } - - inv_cmp_bb->getInstList().push_back(icmp_inv_cmp); - - auto term = bb->getTerminator(); - term->eraseFromParent(); - BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb); - - /* create a bb which handles the cmp of the lower halves */ - BasicBlock *cmp_low_bb = - BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); - op0_low = new TruncInst(op0, NewIntType); - cmp_low_bb->getInstList().push_back(op0_low); - op1_low = new TruncInst(op1, NewIntType); - cmp_low_bb->getInstList().push_back(op1_low); - - icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); - cmp_low_bb->getInstList().push_back(icmp_low); - BranchInst::Create(end_bb, cmp_low_bb); - - BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb); - - PHINode *PN = PHINode::Create(Int1Ty, 3); - PN->addIncoming(icmp_low, cmp_low_bb); - PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); - PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb); - - BasicBlock::iterator ii(IcmpInst); - ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); - - } - - ++count; - } return count; - } bool SplitComparesTransform::runOnModule(Module &M) { - - int bitw = 64; - size_t count = 0; - char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW"); if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW"); - if (bitw_env) { bitw = atoi(bitw_env); } + if (bitw_env) { target_bitwidth = atoi(bitw_env); } enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL; if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL) { + errs() << "Split-compare-pass by laf.intel@gmail.com, extended by " + "heiko@hexco.de (splitting icmp to " + << target_bitwidth << " bit)\n"; - printf( - "Split-compare-pass by laf.intel@gmail.com, extended by " - "heiko@hexco.de\n"); + if (getenv("AFL_DEBUG") != NULL && !debug) { debug = 1; } } else { - be_quiet = 1; - } if (enableFPSplit) { - count = splitFPCompares(M); /* if (!be_quiet) { - errs() << "Split-floatingpoint-compare-pass: " << count << " FP comparisons split\n"; - } - */ simplifyFPCompares(M); - } - simplifyCompares(M); - - simplifyIntSignedness(M); - - switch (bitw) { + std::vector worklist; + /* iterate over all functions, bbs and instruction search for all integer + * compare instructions. Save them into the worklist for later. */ + for (auto &F : M) { + if (!isInInstrumentList(&F)) continue; - case 64: - count += splitIntCompares(M, bitw); - if (debug) - errs() << "Split-integer-compare-pass " << bitw << "bit: " << count - << " split\n"; - bitw >>= 1; -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) - [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ -#endif - case 32: - count += splitIntCompares(M, bitw); - if (debug) - errs() << "Split-integer-compare-pass " << bitw << "bit: " << count - << " split\n"; - bitw >>= 1; -#if LLVM_VERSION_MAJOR > 3 || \ - (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) - [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ -#endif - case 16: - count += splitIntCompares(M, bitw); - if (debug) - errs() << "Split-integer-compare-pass " << bitw << "bit: " << count - << " split\n"; - // bitw >>= 1; - break; + for (auto &BB : F) { + for (auto &IN : BB) { + if (auto CI = dyn_cast(&IN)) { + auto op0 = CI->getOperand(0); + auto op1 = CI->getOperand(1); + if (!op0 || !op1) { return false; } + auto iTy1 = dyn_cast(op0->getType()); + if (iTy1 && isa(op1->getType())) { + unsigned bitw = iTy1->getBitWidth(); + if (isSupportedBitWidth(bitw)) { worklist.push_back(CI); } + } + } + } + } + } - default: - // if (!be_quiet) errs() << "NOT Running split-compare-pass \n"; - return false; - break; + // now that we have a list of all integer comparisons we can start replacing + // them with the splitted alternatives. + for (auto CI : worklist) { + simplifyAndSplit(CI, M); + } + bool brokenDebug = false; + if (verifyModule(M, &errs(), &brokenDebug)) { + reportError( + "Module Verifier failed! Consider reporting a bug with the AFL++ " + "project.", + nullptr, M); } - verifyModule(M); + if (brokenDebug) { + reportError("Module Verifier reported broken Debug Infos - Stripping!", + nullptr, M); + StripDebugInfo(M); + } return true; - } static void registerSplitComparesPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { - PM.add(new SplitComparesTransform()); - } static RegisterStandardPasses RegisterSplitComparesPass( @@ -1373,3 +1256,7 @@ static RegisterStandardPasses RegisterSplitComparesTransPassLTO( registerSplitComparesPass); #endif +static RegisterPass X("splitcompares", + "AFL++ split compares", + true /* Only looks at CFG */, + true /* Analysis Pass */); diff --git a/test/test-int_cases.c b/test/test-int_cases.c new file mode 100644 index 00000000..c76206c5 --- /dev/null +++ b/test/test-int_cases.c @@ -0,0 +1,424 @@ +/* test cases for integer comparison transformations + * compile with -DINT_TYPE="signed char" + * or -DINT_TYPE="short" + * or -DINT_TYPE="int" + * or -DINT_TYPE="long" + * or -DINT_TYPE="long long" + */ + +#include + +int main() { + + volatile INT_TYPE a, b; + /* different values */ + a = -21; + b = -2; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = 1; + b = 8; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 255) { /* short or bigger */ + volatile short a, b; + a = 2; + b = 256+1; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1 - 256; + b = -8; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 65535) { /* int or bigger */ + volatile int a, b; + a = 2; + b = 65536+1; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1 - 65536; + b = -8; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 4294967295) { /* long or bigger */ + volatile long a, b; + a = 2; + b = 4294967296+1; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1 - 4294967296; + b = -8; /* signs equal */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + } + } + } + + a = -1; + b = 1; /* signs differ */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = 0; /* signs differ */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = -2; + b = 8; /* signs differ */ + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -2; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 8; + b = 1; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 255) { + volatile short a, b; + a = 1 + 256; + b = 3; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -256; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 65535) { + volatile int a, b; + a = 1 + 65536; + b = 3; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -65536; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 4294967295) { + volatile long a, b; + a = 1 + 4294967296; + b = 3; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -4294967296; /* signs equal */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + } + } + } + + a = 1; + b = -1; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 0; + b = -1; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 8; + b = -2; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 1; + b = -2; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 255) { + volatile short a, b; + a = 1 + 256; + b = -2; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -2 - 256; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 65535) { + volatile int a, b; + a = 1 + 65536; + b = -2; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -2 - 65536; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 4294967295) { + volatile long a, b; + a = 1 + 4294967296; + b = -2; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = -1; + b = -2 - 4294967296; /* signs differ */ + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + } + } + } + + /* equal values */ + a = 0; + b = 0; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = -0; + b = 0; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = 1; + b = 1; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = 5; + b = 5; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = -1; + b = -1; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = -5; + b = -5; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 255) { + volatile short a, b; + a = 1 + 256; + b = 1 + 256; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = -2 - 256; + b = -2 - 256; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 65535) { + volatile int a, b; + a = 1 + 65536; + b = 1 + 65536; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = -2 - 65536; + b = -2 - 65536; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + if ((unsigned)(INT_TYPE)(~0) > 4294967295) { + volatile long a, b; + a = 1 + 4294967296; + b = 1 + 4294967296; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = -2 - 4294967296; + b = -2 - 4294967296; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + } + } + } +} + diff --git a/test/test-uint_cases.c b/test/test-uint_cases.c new file mode 100644 index 00000000..8496cffe --- /dev/null +++ b/test/test-uint_cases.c @@ -0,0 +1,217 @@ +/* + * compile with -DUINT_TYPE="unsigned char" + * or -DUINT_TYPE="unsigned short" + * or -DUINT_TYPE="unsigned int" + * or -DUINT_TYPE="unsigned long" + * or -DUINT_TYPE="unsigned long long" + */ + +#include + +int main() { + + volatile UINT_TYPE a, b; + + a = 1; + b = 8; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + if ((UINT_TYPE)(~0) > 255) { + volatile unsigned short a, b; + a = 256+2; + b = 256+21; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = 21; + b = 256+1; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + if ((UINT_TYPE)(~0) > 65535) { + volatile unsigned int a, b; + a = 65536+2; + b = 65536+21; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = 21; + b = 65536+1; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + } + + if ((UINT_TYPE)(~0) > 4294967295) { + volatile unsigned long a, b; + a = 4294967296+2; + b = 4294967296+21; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + + a = 21; + b = 4294967296+1; + assert((a < b)); + assert((a <= b)); + assert(!(a > b)); + assert(!(a >= b)); + assert((a != b)); + assert(!(a == b)); + } + } + + a = 8; + b = 1; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((UINT_TYPE)(~0) > 255) { + volatile unsigned short a, b; + a = 256+2; + b = 256+1; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 256+2; + b = 6; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((UINT_TYPE)(~0) > 65535) { + volatile unsigned int a, b; + a = 65536+2; + b = 65536+1; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 65536+2; + b = 6; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + if ((UINT_TYPE)(~0) > 4294967295) { + volatile unsigned long a, b; + a = 4294967296+2; + b = 4294967296+1; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + a = 4294967296+2; + b = 6; + assert((a > b)); + assert((a >= b)); + assert(!(a < b)); + assert(!(a <= b)); + assert((a != b)); + assert(!(a == b)); + + } + } + } + + + a = 0; + b = 0; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + a = 1; + b = 1; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + if ((UINT_TYPE)(~0) > 255) { + volatile unsigned short a, b; + a = 256+5; + b = 256+5; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + if ((UINT_TYPE)(~0) > 65535) { + volatile unsigned int a, b; + a = 65536+5; + b = 65536+5; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + + if ((UINT_TYPE)(~0) > 4294967295) { + volatile unsigned long a, b; + a = 4294967296+5; + b = 4294967296+5; + assert(!(a < b)); + assert((a <= b)); + assert(!(a > b)); + assert((a >= b)); + assert(!(a != b)); + assert((a == b)); + } + } + + } + +} + diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index 4d75430e..9ca1d5fc 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -65,7 +65,11 @@ if [ ! -f "$BIN" -o ! -x "$BIN" ]; then fi if [ ! -d "$DIR/queue" ]; then +<<<<<<< Updated upstream echo "[-] Error: directory '$DIR' not found or not created by afl-fuzz." 1>&2 +======= + echo "[-] Error: directory '$DIR/queue' not found or not created by afl-fuzz." 1>&2 +>>>>>>> Stashed changes exit 1 fi -- cgit 1.4.1 From a4cb2414d5a26699f999667752d3461da20d3f82 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 9 Jun 2021 21:29:41 +0200 Subject: Revert "add test cases for splitting integer comparisons" This reverts commit e0aa411647e1a525a3a0488d929ec71611388d54. --- instrumentation/split-compares-pass.so.cc | 1009 ++++++++++++++++------------- test/test-int_cases.c | 424 ------------ test/test-uint_cases.c | 217 ------- utils/crash_triage/triage_crashes.sh | 4 - 4 files changed, 561 insertions(+), 1093 deletions(-) delete mode 100644 test/test-int_cases.c delete mode 100644 test/test-uint_cases.c (limited to 'utils') diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc index 3dbf7878..b02a89fb 100644 --- a/instrumentation/split-compares-pass.so.cc +++ b/instrumentation/split-compares-pass.so.cc @@ -47,99 +47,50 @@ using namespace llvm; #include "afl-llvm-common.h" -// uncomment this toggle function verification at each step. horribly slow, but -// helps to pinpoint a potential problem in the splitting code. -//#define VERIFY_TOO_MUCH 1 - namespace { class SplitComparesTransform : public ModulePass { + public: static char ID; SplitComparesTransform() : ModulePass(ID), enableFPSplit(0) { + initInstrumentList(); + } bool runOnModule(Module &M) override; #if LLVM_VERSION_MAJOR >= 4 StringRef getPassName() const override { + #else const char *getPassName() const override { #endif - return "AFL_SplitComparesTransform"; + return "simplifies and splits ICMP instructions"; + } private: int enableFPSplit; - unsigned target_bitwidth = 8; - - size_t count = 0; - + size_t splitIntCompares(Module &M, unsigned bitw); size_t splitFPCompares(Module &M); + bool simplifyCompares(Module &M); bool simplifyFPCompares(Module &M); + bool simplifyIntSignedness(Module &M); size_t nextPowerOfTwo(size_t in); - /// simplify the comparison and then split the comparison until the - /// target_bitwidth is reached. - bool simplifyAndSplit(CmpInst *I, Module &M); - /// simplify a non-strict comparison (e.g., less than or equals) - bool simplifyOrEqualsCompare(CmpInst *IcmpInst, Module &M, - std::vector &worklist); - /// simplify a signed comparison (signed less or greater than) - bool simplifySignedCompare(CmpInst *IcmpInst, Module &M, - std::vector &worklist); - /// splits an icmp into nested icmps recursivly until target_bitwidth is - /// reached - bool splitCompare(CmpInst *I, Module &M); - - /// print an error to llvm's errs stream, but only if not ordered to be quiet - void reportError(const StringRef msg, Instruction *I, Module &M) { - if (!be_quiet) { - errs() << "[AFL++ SplitComparesTransform] ERROR: " << msg << "\n"; - if (debug) { - if (I) { - errs() << "Instruction = " << *I << "\n"; - if (auto BB = I->getParent()) { - if (auto F = BB->getParent()) { - if (F->hasName()) { - errs() << "|-> in function " << F->getName() << " "; - } - } - } - } - auto n = M.getName(); - if (n.size() > 0) { errs() << "in module " << n << "\n"; } - } - } - } - - bool isSupportedBitWidth(unsigned bitw) { - // IDK whether the icmp code works on other bitwidths. I guess not? So we - // try to avoid dealing with other weird icmp's that llvm might use (looking - // at you `icmp i0`). - switch (bitw) { - case 8: - case 16: - case 32: - case 64: - case 128: - case 256: - return true; - default: - return false; - } - } }; } // namespace char SplitComparesTransform::ID = 0; -/// This function splits FCMP instructions with xGE or xLE predicates into two -/// FCMP instructions with predicate xGT or xLT and EQ +/* This function splits FCMP instructions with xGE or xLE predicates into two + * FCMP instructions with predicate xGT or xLT and EQ */ bool SplitComparesTransform::simplifyFPCompares(Module &M) { + LLVMContext & C = M.getContext(); std::vector fcomps; IntegerType * Int1Ty = IntegerType::getInt1Ty(C); @@ -147,18 +98,23 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { /* iterate over all functions, bbs and instruction and add * all integer comparisons with >= and <= predicates to the icomps vector */ for (auto &F : M) { + if (!isInInstrumentList(&F)) continue; for (auto &BB : F) { + for (auto &IN : BB) { + CmpInst *selectcmpInst = nullptr; if ((selectcmpInst = dyn_cast(&IN))) { + if (enableFPSplit && (selectcmpInst->getPredicate() == CmpInst::FCMP_OGE || selectcmpInst->getPredicate() == CmpInst::FCMP_UGE || selectcmpInst->getPredicate() == CmpInst::FCMP_OLE || selectcmpInst->getPredicate() == CmpInst::FCMP_ULE)) { + auto op0 = selectcmpInst->getOperand(0); auto op1 = selectcmpInst->getOperand(1); @@ -171,16 +127,22 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; } fcomps.push_back(selectcmpInst); + } + } + } + } + } if (!fcomps.size()) { return false; } /* transform for floating point */ for (auto &FcmpInst : fcomps) { + BasicBlock *bb = FcmpInst->getParent(); auto op0 = FcmpInst->getOperand(0); @@ -193,6 +155,7 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { CmpInst::Predicate new_pred; switch (pred) { + case CmpInst::FCMP_UGE: new_pred = CmpInst::FCMP_UGT; break; @@ -207,6 +170,7 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { break; default: // keep the compiler happy continue; + } /* split before the fcmp instruction */ @@ -250,428 +214,305 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) { /* replace the old FcmpInst with our new and shiny PHI inst */ BasicBlock::iterator ii(FcmpInst); ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN); + } return true; + } -/// This function splits ICMP instructions with xGE or xLE predicates into two -/// ICMP instructions with predicate xGT or xLT and EQ -bool SplitComparesTransform::simplifyOrEqualsCompare( - CmpInst *IcmpInst, Module &M, std::vector &worklist) { - LLVMContext &C = M.getContext(); - IntegerType *Int1Ty = IntegerType::getInt1Ty(C); +/* This function splits ICMP instructions with xGE or xLE predicates into two + * ICMP instructions with predicate xGT or xLT and EQ */ +bool SplitComparesTransform::simplifyCompares(Module &M) { - /* find out what the new predicate is going to be */ - auto cmp_inst = dyn_cast(IcmpInst); - if (!cmp_inst) { return false; } - - BasicBlock *bb = IcmpInst->getParent(); + LLVMContext & C = M.getContext(); + std::vector icomps; + IntegerType * Int1Ty = IntegerType::getInt1Ty(C); - auto op0 = IcmpInst->getOperand(0); - auto op1 = IcmpInst->getOperand(1); + /* iterate over all functions, bbs and instruction and add + * all integer comparisons with >= and <= predicates to the icomps vector */ + for (auto &F : M) { - CmpInst::Predicate pred = cmp_inst->getPredicate(); - CmpInst::Predicate new_pred; + if (!isInInstrumentList(&F)) continue; - switch (pred) { - case CmpInst::ICMP_UGE: - new_pred = CmpInst::ICMP_UGT; - break; - case CmpInst::ICMP_SGE: - new_pred = CmpInst::ICMP_SGT; - break; - case CmpInst::ICMP_ULE: - new_pred = CmpInst::ICMP_ULT; - break; - case CmpInst::ICMP_SLE: - new_pred = CmpInst::ICMP_SLT; - break; - default: // keep the compiler happy - return false; - } + for (auto &BB : F) { - /* split before the icmp instruction */ - BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); - - /* the old bb now contains a unconditional jump to the new one (end_bb) - * we need to delete it later */ - - /* create the ICMP instruction with new_pred and add it to the old basic - * block bb it is now at the position where the old IcmpInst was */ - CmpInst *icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), icmp_np); - - /* create a new basic block which holds the new EQ icmp */ - CmpInst *icmp_eq; - /* insert middle_bb before end_bb */ - BasicBlock *middle_bb = - BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); - icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1); - middle_bb->getInstList().push_back(icmp_eq); - /* add an unconditional branch to the end of middle_bb with destination - * end_bb */ - BranchInst::Create(end_bb, middle_bb); - - /* replace the uncond branch with a conditional one, which depends on the - * new_pred icmp. True goes to end, false to the middle (injected) bb */ - auto term = bb->getTerminator(); - BranchInst::Create(end_bb, middle_bb, icmp_np, bb); - term->eraseFromParent(); - - /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI - * inst to wire up the loose ends */ - PHINode *PN = PHINode::Create(Int1Ty, 2, ""); - /* the first result depends on the outcome of icmp_eq */ - PN->addIncoming(icmp_eq, middle_bb); - /* if the source was the original bb we know that the icmp_np yielded true - * hence we can hardcode this value */ - PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); - /* replace the old IcmpInst with our new and shiny PHI inst */ - BasicBlock::iterator ii(IcmpInst); - ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); - - worklist.push_back(icmp_np); - worklist.push_back(icmp_eq); + for (auto &IN : BB) { - return true; -} + CmpInst *selectcmpInst = nullptr; -/// Simplify a signed comparison operator by splitting it into a unsigned and -/// bit comparison. add all resulting comparisons to -/// the worklist passed as a reference. -bool SplitComparesTransform::simplifySignedCompare( - CmpInst *IcmpInst, Module &M, std::vector &worklist) { - LLVMContext &C = M.getContext(); - IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + if ((selectcmpInst = dyn_cast(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::ICMP_UGE || + selectcmpInst->getPredicate() == CmpInst::ICMP_SGE || + selectcmpInst->getPredicate() == CmpInst::ICMP_ULE || + selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) { - BasicBlock *bb = IcmpInst->getParent(); + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); - auto op0 = IcmpInst->getOperand(0); - auto op1 = IcmpInst->getOperand(1); + IntegerType *intTyOp0 = dyn_cast(op0->getType()); + IntegerType *intTyOp1 = dyn_cast(op1->getType()); - IntegerType *intTyOp0 = dyn_cast(op0->getType()); - if (!intTyOp0) { return false; } - unsigned bitw = intTyOp0->getBitWidth(); - IntegerType *IntType = IntegerType::get(C, bitw); + /* this is probably not needed but we do it anyway */ + if (!intTyOp0 || !intTyOp1) { continue; } - /* get the new predicate */ - auto cmp_inst = dyn_cast(IcmpInst); - if (!cmp_inst) { return false; } - auto pred = cmp_inst->getPredicate(); - CmpInst::Predicate new_pred; + icomps.push_back(selectcmpInst); - if (pred == CmpInst::ICMP_SGT) { - new_pred = CmpInst::ICMP_UGT; + } - } else { - new_pred = CmpInst::ICMP_ULT; - } + } - BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); - - /* create a 1 bit compare for the sign bit. to do this shift and trunc - * the original operands so only the first bit remains.*/ - Value *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit; - - IRBuilder<> IRB(bb->getTerminator()); - s_op0 = IRB.CreateLShr(op0, ConstantInt::get(IntType, bitw - 1)); - t_op0 = IRB.CreateTruncOrBitCast(s_op0, Int1Ty); - s_op1 = IRB.CreateLShr(op1, ConstantInt::get(IntType, bitw - 1)); - t_op1 = IRB.CreateTruncOrBitCast(s_op1, Int1Ty); - /* compare of the sign bits */ - icmp_sign_bit = IRB.CreateCmp(CmpInst::ICMP_EQ, t_op0, t_op1); - - /* create a new basic block which is executed if the signedness bit is - * different */ - CmpInst * icmp_inv_sig_cmp; - BasicBlock *sign_bb = - BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb); - if (pred == CmpInst::ICMP_SGT) { - /* if we check for > and the op0 positive and op1 negative then the final - * result is true. if op0 negative and op1 pos, the cmp must result - * in false - */ - icmp_inv_sig_cmp = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1); + } + + } - } else { - /* just the inverse of the above statement */ - icmp_inv_sig_cmp = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1); } - sign_bb->getInstList().push_back(icmp_inv_sig_cmp); - BranchInst::Create(end_bb, sign_bb); + if (!icomps.size()) { return false; } - /* create a new bb which is executed if signedness is equal */ - CmpInst * icmp_usign_cmp; - BasicBlock *middle_bb = - BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); - /* we can do a normal unsigned compare now */ - icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); + for (auto &IcmpInst : icomps) { - middle_bb->getInstList().push_back(icmp_usign_cmp); - BranchInst::Create(end_bb, middle_bb); + BasicBlock *bb = IcmpInst->getParent(); - auto term = bb->getTerminator(); - /* if the sign is eq do a normal unsigned cmp, else we have to check the - * signedness bit */ - BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb); - term->eraseFromParent(); + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); - PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + /* find out what the new predicate is going to be */ + auto cmp_inst = dyn_cast(IcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); + CmpInst::Predicate new_pred; + + switch (pred) { + + case CmpInst::ICMP_UGE: + new_pred = CmpInst::ICMP_UGT; + break; + case CmpInst::ICMP_SGE: + new_pred = CmpInst::ICMP_SGT; + break; + case CmpInst::ICMP_ULE: + new_pred = CmpInst::ICMP_ULT; + break; + case CmpInst::ICMP_SLE: + new_pred = CmpInst::ICMP_SLT; + break; + default: // keep the compiler happy + continue; + + } + + /* split before the icmp instruction */ + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* the old bb now contains a unconditional jump to the new one (end_bb) + * we need to delete it later */ + + /* create the ICMP instruction with new_pred and add it to the old basic + * block bb it is now at the position where the old IcmpInst was */ + Instruction *icmp_np; + icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_np); - PN->addIncoming(icmp_usign_cmp, middle_bb); - PN->addIncoming(icmp_inv_sig_cmp, sign_bb); + /* create a new basic block which holds the new EQ icmp */ + Instruction *icmp_eq; + /* insert middle_bb before end_bb */ + BasicBlock *middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1); + middle_bb->getInstList().push_back(icmp_eq); + /* add an unconditional branch to the end of middle_bb with destination + * end_bb */ + BranchInst::Create(end_bb, middle_bb); - BasicBlock::iterator ii(IcmpInst); - ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + /* replace the uncond branch with a conditional one, which depends on the + * new_pred icmp. True goes to end, false to the middle (injected) bb */ + auto term = bb->getTerminator(); + BranchInst::Create(end_bb, middle_bb, icmp_np, bb); + term->eraseFromParent(); - // save for later - worklist.push_back(icmp_usign_cmp); + /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI + * inst to wire up the loose ends */ + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + /* the first result depends on the outcome of icmp_eq */ + PN->addIncoming(icmp_eq, middle_bb); + /* if the source was the original bb we know that the icmp_np yielded true + * hence we can hardcode this value */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + /* replace the old IcmpInst with our new and shiny PHI inst */ + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); - // signed comparisons are not supported by the splitting code, so we must not - // add it to the worklist. - // worklist.push_back(icmp_inv_sig_cmp); + } return true; + } -bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M) { - auto pred = cmp_inst->getPredicate(); - switch (pred) { - case CmpInst::ICMP_EQ: - case CmpInst::ICMP_NE: - case CmpInst::ICMP_UGT: - case CmpInst::ICMP_ULT: - break; - default: - // unsupported predicate! - return false; - } +/* this function transforms signed compares to equivalent unsigned compares */ +bool SplitComparesTransform::simplifyIntSignedness(Module &M) { - auto op0 = cmp_inst->getOperand(0); - auto op1 = cmp_inst->getOperand(1); + LLVMContext & C = M.getContext(); + std::vector icomps; + IntegerType * Int1Ty = IntegerType::getInt1Ty(C); - // get bitwidth by checking the bitwidth of the first operator - IntegerType *intTyOp0 = dyn_cast(op0->getType()); - if (!intTyOp0) { - // not an integer type - return false; - } + /* iterate over all functions, bbs and instructions and add + * all signed compares to icomps vector */ + for (auto &F : M) { - unsigned bitw = intTyOp0->getBitWidth(); - if (bitw == target_bitwidth) { - // already the target bitwidth so we have to do nothing here. - return true; - } + if (!isInInstrumentList(&F)) continue; - LLVMContext &C = M.getContext(); - IntegerType *Int1Ty = IntegerType::getInt1Ty(C); - BasicBlock *bb = cmp_inst->getParent(); - IntegerType *OldIntType = IntegerType::get(C, bitw); - IntegerType *NewIntType = IntegerType::get(C, bitw / 2); - BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(cmp_inst)); - CmpInst *icmp_high, *icmp_low; + for (auto &BB : F) { - /* create the comparison of the top halves of the original operands */ - Instruction *s_op0, *op0_high, *s_op1, *op1_high; + for (auto &IN : BB) { - s_op0 = BinaryOperator::Create(Instruction::LShr, op0, - ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); - op0_high = new TruncInst(s_op0, NewIntType); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), op0_high); + CmpInst *selectcmpInst = nullptr; - s_op1 = BinaryOperator::Create(Instruction::LShr, op1, - ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); - op1_high = new TruncInst(s_op1, NewIntType); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), op1_high); + if ((selectcmpInst = dyn_cast(&IN))) { - icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); - bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), - icmp_high); + if (selectcmpInst->getPredicate() == CmpInst::ICMP_SGT || + selectcmpInst->getPredicate() == CmpInst::ICMP_SLT) { - PHINode *PN = nullptr; + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); - /* now we have to destinguish between == != and > < */ - if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { - /* transformation for == and != icmps */ + IntegerType *intTyOp0 = dyn_cast(op0->getType()); + IntegerType *intTyOp1 = dyn_cast(op1->getType()); - /* create a compare for the lower half of the original operands */ - BasicBlock *cmp_low_bb = - BasicBlock::Create(C, "" /*"injected"*/, end_bb->getParent(), end_bb); + /* see above */ + if (!intTyOp0 || !intTyOp1) { continue; } - Value *op0_low, *op1_low; + /* i think this is not possible but to lazy to look it up */ + if (intTyOp0->getBitWidth() != intTyOp1->getBitWidth()) { - IRBuilder<> Builder(cmp_low_bb); + continue; - op0_low = Builder.CreateTrunc(op0, NewIntType); - op1_low = Builder.CreateTrunc(op1, NewIntType); + } - icmp_low = dyn_cast(Builder.CreateICmp(pred, op0_low, op1_low)); - // icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); - // cmp_low_bb->getInstList().push_back(icmp_low); + icomps.push_back(selectcmpInst); - BranchInst::Create(end_bb, cmp_low_bb); + } - /* dependent on the cmp of the high parts go to the end or go on with - * the comparison */ - auto term = bb->getTerminator(); - BranchInst *br = nullptr; - if (pred == CmpInst::ICMP_EQ) { - br = BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); - } else { - /* CmpInst::ICMP_NE */ - br = BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); - } - term->eraseFromParent(); + } + + } - /* create the PHI and connect the edges accordingly */ - PN = PHINode::Create(Int1Ty, 2, ""); - PN->addIncoming(icmp_low, cmp_low_bb); - Value *val = nullptr; - if (pred == CmpInst::ICMP_EQ) { - val = ConstantInt::get(Int1Ty, 0); - } else { - /* CmpInst::ICMP_NE */ - val = ConstantInt::get(Int1Ty, 1); } - PN->addIncoming(val, icmp_high->getParent()); - } else { - /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */ - /* transformations for < and > */ - - /* create a basic block which checks for the inverse predicate. - * if this is true we can go to the end if not we have to go to the - * bb which checks the lower half of the operands */ - Instruction *icmp_inv_cmp, *op0_low, *op1_low; - BasicBlock * inv_cmp_bb = - BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb); - if (pred == CmpInst::ICMP_UGT) { - icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, - op0_high, op1_high); + } - } else { - icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, - op0_high, op1_high); - } + if (!icomps.size()) { return false; } - inv_cmp_bb->getInstList().push_back(icmp_inv_cmp); + for (auto &IcmpInst : icomps) { - auto term = bb->getTerminator(); - term->eraseFromParent(); - BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb); + BasicBlock *bb = IcmpInst->getParent(); + + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); - /* create a bb which handles the cmp of the lower halves */ - BasicBlock *cmp_low_bb = - BasicBlock::Create(C, "" /*"injected"*/, end_bb->getParent(), end_bb); - op0_low = new TruncInst(op0, NewIntType); - cmp_low_bb->getInstList().push_back(op0_low); - op1_low = new TruncInst(op1, NewIntType); - cmp_low_bb->getInstList().push_back(op1_low); + IntegerType *intTyOp0 = dyn_cast(op0->getType()); + if (!intTyOp0) { continue; } + unsigned bitw = intTyOp0->getBitWidth(); + IntegerType *IntType = IntegerType::get(C, bitw); - icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); - cmp_low_bb->getInstList().push_back(icmp_low); - BranchInst::Create(end_bb, cmp_low_bb); + /* get the new predicate */ + auto cmp_inst = dyn_cast(IcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); + CmpInst::Predicate new_pred; - BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb); + if (pred == CmpInst::ICMP_SGT) { - PN = PHINode::Create(Int1Ty, 3); - PN->addIncoming(icmp_low, cmp_low_bb); - PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); - PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb); - } + new_pred = CmpInst::ICMP_UGT; - BasicBlock::iterator ii(cmp_inst); - ReplaceInstWithInst(cmp_inst->getParent()->getInstList(), ii, PN); + } else { + + new_pred = CmpInst::ICMP_ULT; - // We split the comparison into low and high. If this isn't our target - // bitwidth we recursivly split the low and high parts again until we have - // target bitwidth. - if ((bitw / 2) > target_bitwidth) { - if (!splitCompare(icmp_high, M)) { - reportError("Failed to split high comparison", icmp_high, M); - return false; - } - if (!splitCompare(icmp_low, M)) { - reportError("Failed to split low comparison", icmp_low, M); - return false; } - } - return true; -} + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); -bool SplitComparesTransform::simplifyAndSplit(CmpInst *I, Module &M) { - std::vector worklist; + /* create a 1 bit compare for the sign bit. to do this shift and trunc + * the original operands so only the first bit remains.*/ + Instruction *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit; - auto op0 = I->getOperand(0); - auto op1 = I->getOperand(1); - if (!op0 || !op1) { return false; } - auto op0Ty = dyn_cast(op0->getType()); - if (!op0Ty || !isa(op1->getType())) { return true; } + s_op0 = BinaryOperator::Create(Instruction::LShr, op0, + ConstantInt::get(IntType, bitw - 1)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); + t_op0 = new TruncInst(s_op0, Int1Ty); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0); - unsigned bitw = op0Ty->getBitWidth(); + s_op1 = BinaryOperator::Create(Instruction::LShr, op1, + ConstantInt::get(IntType, bitw - 1)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); + t_op1 = new TruncInst(s_op1, Int1Ty); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1); -#ifdef VERIFY_TOO_MUCH - auto F = I->getParent()->getParent(); -#endif + /* compare of the sign bits */ + icmp_sign_bit = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); - // we run the comparison simplification on all compares regardless of their - // bitwidth. - if (I->getPredicate() == CmpInst::ICMP_UGE || - I->getPredicate() == CmpInst::ICMP_SGE || - I->getPredicate() == CmpInst::ICMP_ULE || - I->getPredicate() == CmpInst::ICMP_SLE) { - if (!simplifyOrEqualsCompare(I, M, worklist)) { - reportError( - "Failed to simplify inequality or equals comparison " - "(UGE,SGE,ULE,SLE)", - I, M); - } - } else if (I->getPredicate() == CmpInst::ICMP_SGT || - I->getPredicate() == CmpInst::ICMP_SLT) { - if (!simplifySignedCompare(I, M, worklist)) { - reportError("Failed to simplify signed comparison (SGT,SLT)", I, M); - } - } + /* create a new basic block which is executed if the signedness bit is + * different */ + Instruction *icmp_inv_sig_cmp; + BasicBlock * sign_bb = + BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb); + if (pred == CmpInst::ICMP_SGT) { -#ifdef VERIFY_TOO_MUCH - if (verifyFunction(*F, &errs())) { - reportError("simpliyfing compare lead to broken function", nullptr, M); - } -#endif + /* if we check for > and the op0 positive and op1 negative then the final + * result is true. if op0 negative and op1 pos, the cmp must result + * in false + */ + icmp_inv_sig_cmp = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1); - // the simplification methods replace the original CmpInst and push the - // resulting new CmpInst into the worklist. If the worklist is empty then - // we only have to split the original CmpInst. - if (worklist.size() == 0) { worklist.push_back(I); } - - for (auto cmp : worklist) { - // we split the simplified compares into comparisons with smaller bitwidths - // if they are larger than our target_bitwidth. - if (bitw > target_bitwidth) { - if (!splitCompare(cmp, M)) { - reportError("Failed to split comparison", cmp, M); - } + } else { + + /* just the inverse of the above statement */ + icmp_inv_sig_cmp = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1); -#ifdef VERIFY_TOO_MUCH - if (verifyFunction(*F, &errs())) { - reportError("splitting compare lead to broken function", nullptr, M); - } -#endif } + + sign_bb->getInstList().push_back(icmp_inv_sig_cmp); + BranchInst::Create(end_bb, sign_bb); + + /* create a new bb which is executed if signedness is equal */ + Instruction *icmp_usign_cmp; + BasicBlock * middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + /* we can do a normal unsigned compare now */ + icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); + middle_bb->getInstList().push_back(icmp_usign_cmp); + BranchInst::Create(end_bb, middle_bb); + + auto term = bb->getTerminator(); + /* if the sign is eq do a normal unsigned cmp, else we have to check the + * signedness bit */ + BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb); + term->eraseFromParent(); + + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + + PN->addIncoming(icmp_usign_cmp, middle_bb); + PN->addIncoming(icmp_inv_sig_cmp, sign_bb); + + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + } - count++; return true; + } size_t SplitComparesTransform::nextPowerOfTwo(size_t in) { + --in; in |= in >> 1; in |= in >> 2; @@ -679,10 +520,12 @@ size_t SplitComparesTransform::nextPowerOfTwo(size_t in) { // in |= in >> 8; // in |= in >> 16; return in + 1; + } /* splits fcmps into two nested fcmps with sign compare and the rest */ size_t SplitComparesTransform::splitFPCompares(Module &M) { + size_t count = 0; LLVMContext &C = M.getContext(); @@ -694,9 +537,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { /* define unions with floating point and (sign, exponent, mantissa) triples */ if (dl.isLittleEndian()) { + } else if (dl.isBigEndian()) { + } else { + return count; + } #endif @@ -706,13 +553,17 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { /* get all EQ, NE, GT, and LT fcmps. if the other two * functions were executed only these four predicates should exist */ for (auto &F : M) { + if (!isInInstrumentList(&F)) continue; for (auto &BB : F) { + for (auto &IN : BB) { + CmpInst *selectcmpInst = nullptr; if ((selectcmpInst = dyn_cast(&IN))) { + if (selectcmpInst->getPredicate() == CmpInst::FCMP_OEQ || selectcmpInst->getPredicate() == CmpInst::FCMP_UEQ || selectcmpInst->getPredicate() == CmpInst::FCMP_ONE || @@ -721,6 +572,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { selectcmpInst->getPredicate() == CmpInst::FCMP_OGT || selectcmpInst->getPredicate() == CmpInst::FCMP_ULT || selectcmpInst->getPredicate() == CmpInst::FCMP_OLT) { + auto op0 = selectcmpInst->getOperand(0); auto op1 = selectcmpInst->getOperand(1); @@ -732,10 +584,15 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; } fcomps.push_back(selectcmpInst); + } + } + } + } + } if (!fcomps.size()) { return count; } @@ -743,6 +600,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { IntegerType *Int1Ty = IntegerType::getInt1Ty(C); for (auto &FcmpInst : fcomps) { + BasicBlock *bb = FcmpInst->getParent(); auto op0 = FcmpInst->getOperand(0); @@ -867,6 +725,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); if (sizeInBits - precision < exTySizeBytes * 8) { + m_e0 = BinaryOperator::Create( Instruction::And, t_e0, ConstantInt::get(t_e0->getType(), mask_exponent)); @@ -879,8 +738,10 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(signequal_bb->getTerminator()), m_e1); } else { + m_e0 = t_e0; m_e1 = t_e1; + } /* compare the exponents of the operands */ @@ -888,6 +749,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction *icmp_exponent_result; BasicBlock * signequal2_bb = signequal_bb; switch (FcmpInst->getPredicate()) { + case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: icmp_exponent_result = @@ -957,6 +819,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { break; default: continue; + } signequal2_bb->getInstList().insert( @@ -964,9 +827,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent_result); { + term = signequal2_bb->getTerminator(); switch (FcmpInst->getPredicate()) { + case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: /* if the exponents are satifying the compare do a fraction cmp in @@ -989,9 +854,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { break; default: continue; + } term->eraseFromParent(); + } /* isolate the mantissa aka fraction */ @@ -999,6 +866,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { bool needTrunc = IntFractionTy->getPrimitiveSizeInBits() < op_size; if (precision - 1 < frTySizeBytes * 8) { + Instruction *m_f0, *m_f1; m_f0 = BinaryOperator::Create( Instruction::And, b_op0, @@ -1012,6 +880,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(middle_bb->getTerminator()), m_f1); if (needTrunc) { + t_f0 = new TruncInst(m_f0, IntFractionTy); t_f1 = new TruncInst(m_f1, IntFractionTy); middle_bb->getInstList().insert( @@ -1020,12 +889,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { + t_f0 = m_f0; t_f1 = m_f1; + } } else { + if (needTrunc) { + t_f0 = new TruncInst(b_op0, IntFractionTy); t_f1 = new TruncInst(b_op1, IntFractionTy); middle_bb->getInstList().insert( @@ -1034,9 +907,12 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { + t_f0 = b_op0; t_f1 = b_op1; + } + } /* compare the fractions of the operands */ @@ -1044,6 +920,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { BasicBlock * middle2_bb = middle_bb; PHINode * PN2 = nullptr; switch (FcmpInst->getPredicate()) { + case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: icmp_fraction_result = @@ -1066,6 +943,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { case CmpInst::FCMP_UGT: case CmpInst::FCMP_OLT: case CmpInst::FCMP_ULT: { + Instruction *icmp_fraction_result2; middle2_bb = middle_bb->splitBasicBlock( @@ -1078,6 +956,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { if (FcmpInst->getPredicate() == CmpInst::FCMP_OGT || FcmpInst->getPredicate() == CmpInst::FCMP_UGT) { + negative_bb->getInstList().push_back( icmp_fraction_result = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1)); @@ -1086,12 +965,14 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1)); } else { + negative_bb->getInstList().push_back( icmp_fraction_result = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1)); positive_bb->getInstList().push_back( icmp_fraction_result2 = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1)); + } BranchInst::Create(middle2_bb, negative_bb); @@ -1111,11 +992,13 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { default: continue; + } PHINode *PN = PHINode::Create(Int1Ty, 3, ""); switch (FcmpInst->getPredicate()) { + case CmpInst::FCMP_UEQ: case CmpInst::FCMP_OEQ: /* unequal signs cannot be equal values */ @@ -1154,94 +1037,328 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { break; default: continue; + } BasicBlock::iterator ii(FcmpInst); ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN); ++count; + + } + + return count; + +} + +/* splits icmps of size bitw into two nested icmps with bitw/2 size each */ +size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { + + size_t count = 0; + + LLVMContext &C = M.getContext(); + + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + IntegerType *OldIntType = IntegerType::get(C, bitw); + IntegerType *NewIntType = IntegerType::get(C, bitw / 2); + + std::vector icomps; + + if (bitw % 2) { return 0; } + + /* not supported yet */ + if (bitw > 64) { return 0; } + + /* get all EQ, NE, UGT, and ULT icmps of width bitw. if the + * functions simplifyCompares() and simplifyIntSignedness() + * were executed only these four predicates should exist */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ || + selectcmpInst->getPredicate() == CmpInst::ICMP_NE || + selectcmpInst->getPredicate() == CmpInst::ICMP_UGT || + selectcmpInst->getPredicate() == CmpInst::ICMP_ULT) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast(op0->getType()); + IntegerType *intTyOp1 = dyn_cast(op1->getType()); + + if (!intTyOp0 || !intTyOp1) { continue; } + + /* check if the bitwidths are the one we are looking for */ + if (intTyOp0->getBitWidth() != bitw || + intTyOp1->getBitWidth() != bitw) { + + continue; + + } + + icomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!icomps.size()) { return 0; } + + for (auto &IcmpInst : icomps) { + + BasicBlock *bb = IcmpInst->getParent(); + + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); + + auto cmp_inst = dyn_cast(IcmpInst); + if (!cmp_inst) { continue; } + auto pred = cmp_inst->getPredicate(); + + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* create the comparison of the top halves of the original operands */ + Instruction *s_op0, *op0_high, *s_op1, *op1_high, *icmp_high; + + s_op0 = BinaryOperator::Create(Instruction::LShr, op0, + ConstantInt::get(OldIntType, bitw / 2)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); + op0_high = new TruncInst(s_op0, NewIntType); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op0_high); + + s_op1 = BinaryOperator::Create(Instruction::LShr, op1, + ConstantInt::get(OldIntType, bitw / 2)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); + op1_high = new TruncInst(s_op1, NewIntType); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op1_high); + + icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_high); + + /* now we have to destinguish between == != and > < */ + if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { + + /* transformation for == and != icmps */ + + /* create a compare for the lower half of the original operands */ + Instruction *op0_low, *op1_low, *icmp_low; + BasicBlock * cmp_low_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + + op0_low = new TruncInst(op0, NewIntType); + cmp_low_bb->getInstList().push_back(op0_low); + + op1_low = new TruncInst(op1, NewIntType); + cmp_low_bb->getInstList().push_back(op1_low); + + icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); + cmp_low_bb->getInstList().push_back(icmp_low); + BranchInst::Create(end_bb, cmp_low_bb); + + /* dependent on the cmp of the high parts go to the end or go on with + * the comparison */ + auto term = bb->getTerminator(); + if (pred == CmpInst::ICMP_EQ) { + + BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); + + } else { + + /* CmpInst::ICMP_NE */ + BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); + + } + + term->eraseFromParent(); + + /* create the PHI and connect the edges accordingly */ + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + PN->addIncoming(icmp_low, cmp_low_bb); + if (pred == CmpInst::ICMP_EQ) { + + PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb); + + } else { + + /* CmpInst::ICMP_NE */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + + } + + /* replace the old icmp with the new PHI */ + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + } else { + + /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */ + /* transformations for < and > */ + + /* create a basic block which checks for the inverse predicate. + * if this is true we can go to the end if not we have to go to the + * bb which checks the lower half of the operands */ + Instruction *icmp_inv_cmp, *op0_low, *op1_low, *icmp_low; + BasicBlock * inv_cmp_bb = + BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb); + if (pred == CmpInst::ICMP_UGT) { + + icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, + op0_high, op1_high); + + } else { + + icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, + op0_high, op1_high); + + } + + inv_cmp_bb->getInstList().push_back(icmp_inv_cmp); + + auto term = bb->getTerminator(); + term->eraseFromParent(); + BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb); + + /* create a bb which handles the cmp of the lower halves */ + BasicBlock *cmp_low_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + op0_low = new TruncInst(op0, NewIntType); + cmp_low_bb->getInstList().push_back(op0_low); + op1_low = new TruncInst(op1, NewIntType); + cmp_low_bb->getInstList().push_back(op1_low); + + icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); + cmp_low_bb->getInstList().push_back(icmp_low); + BranchInst::Create(end_bb, cmp_low_bb); + + BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb); + + PHINode *PN = PHINode::Create(Int1Ty, 3); + PN->addIncoming(icmp_low, cmp_low_bb); + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb); + + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + } + + ++count; + } return count; + } bool SplitComparesTransform::runOnModule(Module &M) { + + int bitw = 64; + size_t count = 0; + char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW"); if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW"); - if (bitw_env) { target_bitwidth = atoi(bitw_env); } + if (bitw_env) { bitw = atoi(bitw_env); } enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL; if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL) { - errs() << "Split-compare-pass by laf.intel@gmail.com, extended by " - "heiko@hexco.de (splitting icmp to " - << target_bitwidth << " bit)\n"; - if (getenv("AFL_DEBUG") != NULL && !debug) { debug = 1; } + printf( + "Split-compare-pass by laf.intel@gmail.com, extended by " + "heiko@hexco.de\n"); } else { + be_quiet = 1; + } if (enableFPSplit) { + count = splitFPCompares(M); /* if (!be_quiet) { + errs() << "Split-floatingpoint-compare-pass: " << count << " FP comparisons split\n"; + } + */ simplifyFPCompares(M); + } - std::vector worklist; - /* iterate over all functions, bbs and instruction search for all integer - * compare instructions. Save them into the worklist for later. */ - for (auto &F : M) { - if (!isInInstrumentList(&F)) continue; + simplifyCompares(M); - for (auto &BB : F) { - for (auto &IN : BB) { - if (auto CI = dyn_cast(&IN)) { - auto op0 = CI->getOperand(0); - auto op1 = CI->getOperand(1); - if (!op0 || !op1) { return false; } - auto iTy1 = dyn_cast(op0->getType()); - if (iTy1 && isa(op1->getType())) { - unsigned bitw = iTy1->getBitWidth(); - if (isSupportedBitWidth(bitw)) { worklist.push_back(CI); } - } - } - } - } - } + simplifyIntSignedness(M); - // now that we have a list of all integer comparisons we can start replacing - // them with the splitted alternatives. - for (auto CI : worklist) { - simplifyAndSplit(CI, M); - } + switch (bitw) { - bool brokenDebug = false; - if (verifyModule(M, &errs(), &brokenDebug)) { - reportError( - "Module Verifier failed! Consider reporting a bug with the AFL++ " - "project.", - nullptr, M); - } + case 64: + count += splitIntCompares(M, bitw); + if (debug) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) + [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif + case 32: + count += splitIntCompares(M, bitw); + if (debug) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) + [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif + case 16: + count += splitIntCompares(M, bitw); + if (debug) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + // bitw >>= 1; + break; + + default: + // if (!be_quiet) errs() << "NOT Running split-compare-pass \n"; + return false; + break; - if (brokenDebug) { - reportError("Module Verifier reported broken Debug Infos - Stripping!", - nullptr, M); - StripDebugInfo(M); } + + verifyModule(M); return true; + } static void registerSplitComparesPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) { + PM.add(new SplitComparesTransform()); + } static RegisterStandardPasses RegisterSplitComparesPass( @@ -1256,7 +1373,3 @@ static RegisterStandardPasses RegisterSplitComparesTransPassLTO( registerSplitComparesPass); #endif -static RegisterPass X("splitcompares", - "AFL++ split compares", - true /* Only looks at CFG */, - true /* Analysis Pass */); diff --git a/test/test-int_cases.c b/test/test-int_cases.c deleted file mode 100644 index c76206c5..00000000 --- a/test/test-int_cases.c +++ /dev/null @@ -1,424 +0,0 @@ -/* test cases for integer comparison transformations - * compile with -DINT_TYPE="signed char" - * or -DINT_TYPE="short" - * or -DINT_TYPE="int" - * or -DINT_TYPE="long" - * or -DINT_TYPE="long long" - */ - -#include - -int main() { - - volatile INT_TYPE a, b; - /* different values */ - a = -21; - b = -2; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = 1; - b = 8; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 255) { /* short or bigger */ - volatile short a, b; - a = 2; - b = 256+1; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1 - 256; - b = -8; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 65535) { /* int or bigger */ - volatile int a, b; - a = 2; - b = 65536+1; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1 - 65536; - b = -8; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 4294967295) { /* long or bigger */ - volatile long a, b; - a = 2; - b = 4294967296+1; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1 - 4294967296; - b = -8; /* signs equal */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - } - } - } - - a = -1; - b = 1; /* signs differ */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = 0; /* signs differ */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = -2; - b = 8; /* signs differ */ - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -2; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 8; - b = 1; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 255) { - volatile short a, b; - a = 1 + 256; - b = 3; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -256; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 65535) { - volatile int a, b; - a = 1 + 65536; - b = 3; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -65536; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 4294967295) { - volatile long a, b; - a = 1 + 4294967296; - b = 3; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -4294967296; /* signs equal */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - } - } - } - - a = 1; - b = -1; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 0; - b = -1; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 8; - b = -2; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 1; - b = -2; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 255) { - volatile short a, b; - a = 1 + 256; - b = -2; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -2 - 256; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 65535) { - volatile int a, b; - a = 1 + 65536; - b = -2; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -2 - 65536; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 4294967295) { - volatile long a, b; - a = 1 + 4294967296; - b = -2; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = -1; - b = -2 - 4294967296; /* signs differ */ - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - } - } - } - - /* equal values */ - a = 0; - b = 0; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = -0; - b = 0; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = 1; - b = 1; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = 5; - b = 5; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = -1; - b = -1; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = -5; - b = -5; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 255) { - volatile short a, b; - a = 1 + 256; - b = 1 + 256; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = -2 - 256; - b = -2 - 256; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 65535) { - volatile int a, b; - a = 1 + 65536; - b = 1 + 65536; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = -2 - 65536; - b = -2 - 65536; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - if ((unsigned)(INT_TYPE)(~0) > 4294967295) { - volatile long a, b; - a = 1 + 4294967296; - b = 1 + 4294967296; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = -2 - 4294967296; - b = -2 - 4294967296; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - } - } - } -} - diff --git a/test/test-uint_cases.c b/test/test-uint_cases.c deleted file mode 100644 index 8496cffe..00000000 --- a/test/test-uint_cases.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * compile with -DUINT_TYPE="unsigned char" - * or -DUINT_TYPE="unsigned short" - * or -DUINT_TYPE="unsigned int" - * or -DUINT_TYPE="unsigned long" - * or -DUINT_TYPE="unsigned long long" - */ - -#include - -int main() { - - volatile UINT_TYPE a, b; - - a = 1; - b = 8; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - if ((UINT_TYPE)(~0) > 255) { - volatile unsigned short a, b; - a = 256+2; - b = 256+21; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = 21; - b = 256+1; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - if ((UINT_TYPE)(~0) > 65535) { - volatile unsigned int a, b; - a = 65536+2; - b = 65536+21; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = 21; - b = 65536+1; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - } - - if ((UINT_TYPE)(~0) > 4294967295) { - volatile unsigned long a, b; - a = 4294967296+2; - b = 4294967296+21; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - - a = 21; - b = 4294967296+1; - assert((a < b)); - assert((a <= b)); - assert(!(a > b)); - assert(!(a >= b)); - assert((a != b)); - assert(!(a == b)); - } - } - - a = 8; - b = 1; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((UINT_TYPE)(~0) > 255) { - volatile unsigned short a, b; - a = 256+2; - b = 256+1; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 256+2; - b = 6; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((UINT_TYPE)(~0) > 65535) { - volatile unsigned int a, b; - a = 65536+2; - b = 65536+1; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 65536+2; - b = 6; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - if ((UINT_TYPE)(~0) > 4294967295) { - volatile unsigned long a, b; - a = 4294967296+2; - b = 4294967296+1; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - a = 4294967296+2; - b = 6; - assert((a > b)); - assert((a >= b)); - assert(!(a < b)); - assert(!(a <= b)); - assert((a != b)); - assert(!(a == b)); - - } - } - } - - - a = 0; - b = 0; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - a = 1; - b = 1; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - if ((UINT_TYPE)(~0) > 255) { - volatile unsigned short a, b; - a = 256+5; - b = 256+5; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - if ((UINT_TYPE)(~0) > 65535) { - volatile unsigned int a, b; - a = 65536+5; - b = 65536+5; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - - if ((UINT_TYPE)(~0) > 4294967295) { - volatile unsigned long a, b; - a = 4294967296+5; - b = 4294967296+5; - assert(!(a < b)); - assert((a <= b)); - assert(!(a > b)); - assert((a >= b)); - assert(!(a != b)); - assert((a == b)); - } - } - - } - -} - diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index 9ca1d5fc..4d75430e 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -65,11 +65,7 @@ if [ ! -f "$BIN" -o ! -x "$BIN" ]; then fi if [ ! -d "$DIR/queue" ]; then -<<<<<<< Updated upstream echo "[-] Error: directory '$DIR' not found or not created by afl-fuzz." 1>&2 -======= - echo "[-] Error: directory '$DIR/queue' not found or not created by afl-fuzz." 1>&2 ->>>>>>> Stashed changes exit 1 fi -- cgit 1.4.1 From 0c3feba3f6b3a517b21f9e96fa5e0758e959dec1 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 14 Jun 2021 22:58:08 +0200 Subject: aflppdriver help output --- utils/aflpp_driver/aflpp_driver.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c index c094c425..ff42f3b9 100644 --- a/utils/aflpp_driver/aflpp_driver.c +++ b/utils/aflpp_driver/aflpp_driver.c @@ -204,21 +204,23 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) { int main(int argc, char **argv) { - printf( - "============================== INFO ================================\n" - "This binary is built for afl++.\n" - "To use with afl-cmin or afl-cmin.bash pass '-' as single command line " - "option\n" - "To run the target function on individual input(s) execute this:\n" - " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" - "To fuzz with afl-fuzz execute this:\n" - " afl-fuzz [afl-flags] -- %s [-N]\n" - "afl-fuzz will run N iterations before re-spawning the process (default: " - "INT_MAX)\n" - "For stdin input processing, pass '-' as single command line option.\n" - "For file input processing, pass '@@' as single command line option.\n" - "===================================================================\n", - argv[0], argv[0]); + if (argc < 2 || strncmp(argv[1], "-h", 2) == 0) + printf( + "============================== INFO ================================\n" + "This binary is built for afl++.\n" + "To use with afl-cmin or afl-cmin.bash pass '-' as single command line " + "option\n" + "To run the target function on individual input(s) execute this:\n" + " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" + "To fuzz with afl-fuzz execute this:\n" + " afl-fuzz [afl-flags] -- %s [-N]\n" + "afl-fuzz will run N iterations before re-spawning the process " + "(default: " + "INT_MAX)\n" + "For stdin input processing, pass '-' as single command line option.\n" + "For file input processing, pass '@@' as single command line option.\n" + "===================================================================\n", + argv[0], argv[0]); if (getenv("AFL_GDB")) { -- cgit 1.4.1 From 8dbe87bdf6c848088cd51923e445b92b9f839956 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 7 Jul 2021 16:22:57 +0200 Subject: print warning for libfuzzer qemu driver --- utils/aflpp_driver/aflpp_qemu_driver.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'utils') diff --git a/utils/aflpp_driver/aflpp_qemu_driver.c b/utils/aflpp_driver/aflpp_qemu_driver.c index 79de5af6..efa80bca 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver.c +++ b/utils/aflpp_driver/aflpp_qemu_driver.c @@ -27,6 +27,9 @@ int main(int argc, char **argv) { } else { + fprintf(stderr + "Using shared-memory testcases. To read via stdin, set " + "AFL_QEMU_DRIVER_NO_HOOK=1.\n"); uint8_t dummy_input[1024000] = {0}; LLVMFuzzerTestOneInput(dummy_input, 1); -- cgit 1.4.1 From f4b975d6ad6f7e55c5c4e290ab85e18957cad0c3 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 7 Jul 2021 22:22:06 +0200 Subject: update doc --- utils/aflpp_driver/README.md | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md index f03c2fe3..4ca59776 100644 --- a/utils/aflpp_driver/README.md +++ b/utils/aflpp_driver/README.md @@ -22,6 +22,8 @@ or `@@` as command line parameters. ## aflpp_qemu_driver +Note that you can use the driver too for frida_mode (`-O`). + aflpp_qemu_driver is used for libfuzzer `LLVMFuzzerTestOneInput()` targets that are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without -fsantize=fuzzer or afl-clang-fast, and link in libAFLQemuDriver.a: @@ -34,3 +36,8 @@ Then just do (where the name of the binary is `fuzz`): AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}') AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` ``` + +if you use afl-cmin or `afl-showmap -C` with the aflpp_qemu_driver you need to +set the set same AFL_QEMU_... (or AFL_FRIDA_...) environment variables. +If you want to use afl-showmap (without -C) or afl-cmin.bash then you may not +set these environment variables and rather set `AFL_QEMU_DRIVER_NO_HOOK=1`. -- cgit 1.4.1 From 6d878a375d91003fb57d7f82a79acfbaa226abb5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 8 Jul 2021 12:29:05 +0200 Subject: fix qemu driver --- utils/aflpp_driver/aflpp_qemu_driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/aflpp_driver/aflpp_qemu_driver.c b/utils/aflpp_driver/aflpp_qemu_driver.c index efa80bca..99a4c9a8 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver.c +++ b/utils/aflpp_driver/aflpp_qemu_driver.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -27,7 +28,7 @@ int main(int argc, char **argv) { } else { - fprintf(stderr + fprintf(stderr, "Using shared-memory testcases. To read via stdin, set " "AFL_QEMU_DRIVER_NO_HOOK=1.\n"); uint8_t dummy_input[1024000] = {0}; -- cgit 1.4.1 From 4fe572b80f76ff0b0e916b639d1e04d5af48b157 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Jul 2021 12:24:29 +0200 Subject: always build aflpp driver --- GNUmakefile | 3 +-- docs/Changelog.md | 1 + utils/aflpp_driver/GNUmakefile | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'utils') diff --git a/GNUmakefile b/GNUmakefile index 53cc0537..7a1ba88a 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -306,6 +306,7 @@ endif .PHONY: all all: test_x86 test_shm test_python ready $(PROGS) afl-as llvm gcc_plugin test_build all_done + -$(MAKE) -C utils/aflpp_driver .PHONY: llvm llvm: @@ -597,7 +598,6 @@ distrib: all -$(MAKE) -f GNUmakefile.gcc_plugin $(MAKE) -C utils/libdislocator $(MAKE) -C utils/libtokencap - -$(MAKE) -C utils/aflpp_driver $(MAKE) -C utils/afl_network_proxy $(MAKE) -C utils/socket_fuzzing $(MAKE) -C utils/argv_fuzzing @@ -622,7 +622,6 @@ source-only: all -$(MAKE) -f GNUmakefile.gcc_plugin $(MAKE) -C utils/libdislocator $(MAKE) -C utils/libtokencap - -$(MAKE) -C utils/aflpp_driver %.8: % @echo .TH $* 8 $(BUILD_DATE) "afl++" > $@ diff --git a/docs/Changelog.md b/docs/Changelog.md index 705daa40..29af44ab 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -36,6 +36,7 @@ sending a mail to . - fix timeout handling - add forkserver support for better performance - ensure afl-compiler-rt is built for gcc_module + - always build aflpp_driver for libfuzzer harnesses - added `AFL_NO_FORKSRV` env variable support to afl-cmin, afl-tmin, and afl-showmap, by @jhertz - removed outdated documents, improved existing documentation diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index ad99b893..c282a9f3 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -15,28 +15,28 @@ aflpp_driver.o: aflpp_driver.c -$(LLVM_BINDIR)clang -I. -I../../include $(CFLAGS) -c aflpp_driver.c libAFLDriver.a: aflpp_driver.o - ar ru libAFLDriver.a aflpp_driver.o - cp -vf libAFLDriver.a ../../ + @ar rc libAFLDriver.a aflpp_driver.o + @cp -vf libAFLDriver.a ../../ debug: $(LLVM_BINDIR)clang -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c $(LLVM_BINDIR)clang -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c #$(LLVM_BINDIR)clang -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c #$(LLVM_BINDIR)clang -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c - ar ru libAFLDriver.a afl-performance.o aflpp_driver.o + ar rc libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o - -ar ru libAFLQemuDriver.a aflpp_qemu_driver.o - -cp -vf libAFLQemuDriver.a ../../ + @-ar rc libAFLQemuDriver.a aflpp_qemu_driver.o + @-cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - -test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." + @-test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - -test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." + @-test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c -- cgit 1.4.1 From 18fd97fc5ffc5ad94e735cfbfa0d500463dcb585 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 19 Jul 2021 09:12:24 +0200 Subject: v3.14c release --- README.md | 9 +++++++-- docs/Changelog.md | 2 +- utils/qbdi_mode/README.md | 4 ++++ 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/README.md b/README.md index 37fd90e3..94a38ab1 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ AFL++ Logo - Release Version: [3.13c](https://github.com/AFLplusplus/AFLplusplus/releases) + Release Version: [3.14c](https://github.com/AFLplusplus/AFLplusplus/releases) - Github Version: 3.14a + Github Version: 3.15a Repository: [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus) @@ -31,6 +31,11 @@ With afl++ 3.13-3.20 we introduce frida_mode (-O) to have an alternative for binary-only fuzzing. It is slower than Qemu mode but works on MacOS, Android, iOS etc. +With afl++ 3.15 we introduced the following changes from previous behaviours: + * Also -M main mode does not due deterministic fuzzing by default anymore + * afl-cmin and afl-showmap -Ci now descent into subdirectories like + afl-fuzz -i does (but note that afl-cmin.bash does not) + With afl++ 3.14 we introduced the following changes from previous behaviours: * afl-fuzz: deterministic fuzzing it not a default for -M main anymore * afl-cmin/afl-showmap -i now descends into subdirectories (afl-cmin.bash diff --git a/docs/Changelog.md b/docs/Changelog.md index 7131360a..fcfd2ce8 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -8,7 +8,7 @@ Want to stay in the loop on major new features? Join our mailing list by sending a mail to . -### Version ++3.14a (release) +### Version ++3.14c (release) - afl-fuzz: - fix -F when a '/' was part of the parameter - fixed a crash for cmplog for very slow inputs diff --git a/utils/qbdi_mode/README.md b/utils/qbdi_mode/README.md index 641a6e85..cf5d3359 100755 --- a/utils/qbdi_mode/README.md +++ b/utils/qbdi_mode/README.md @@ -1,5 +1,9 @@ # qbdi-based binary-only instrumentation for afl-fuzz +NOTE: this code is outdated and first would need to be adapted to the current +afl++ versions first. +Try afl_frida or fpicker [https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/) first, maybe they suite your need. + ## 1) Introduction The code in ./qbdi_mode allows you to build a standalone feature that -- cgit 1.4.1