From 70403f7e1b586bf23eebb131d5db2397d708abf0 Mon Sep 17 00:00:00 2001 From: realmadsci <71108352+realmadsci@users.noreply.github.com> Date: Mon, 15 Mar 2021 14:09:54 -0400 Subject: triage_crashes.sh: Allow @@ to be part of an arg --- utils/crash_triage/triage_crashes.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index bf763cba..42cf8158 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -90,8 +90,9 @@ for crash in $DIR/crashes/id:*; do for a in $@; do - if [ "$a" = "@@" ] ; then - use_args="$use_args $crash" + if echo "$a" | grep -qF '@@'; then + escaped_fname=`echo $crash | sed 's:/:\\\\/:g'` + use_args="$use_args `echo $a | sed "s/@@/$escaped_fname/g"`" unset use_stdio else use_args="$use_args $a" -- cgit 1.4.1 From b289e7ad073d4affae76de3da5d1faeba0f8a07e Mon Sep 17 00:00:00 2001 From: realmadsci <71108352+realmadsci@users.noreply.github.com> Date: Mon, 15 Mar 2021 14:09:03 -0400 Subject: triage_crashes.sh: Fix error reporting --- utils/crash_triage/triage_crashes.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index 42cf8158..c9ca1f79 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -60,12 +60,12 @@ if fi if [ ! -f "$BIN" -o ! -x "$BIN" ]; then - echo "[-] Error: binary '$2' not found or is not executable." 1>&2 + echo "[-] Error: binary '$BIN' not found or is not executable." 1>&2 exit 1 fi if [ ! -d "$DIR/queue" ]; then - echo "[-] Error: directory '$1' not found or not created by afl-fuzz." 1>&2 + echo "[-] Error: directory '$DIR' not found or not created by afl-fuzz." 1>&2 exit 1 fi -- cgit 1.4.1 From fa349b4f4ceaa3e8309e7b01ddee6b6f895175f6 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Fri, 26 Mar 2021 17:49:20 +0100 Subject: simpler argument processing --- utils/crash_triage/triage_crashes.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh index c9ca1f79..4d75430e 100755 --- a/utils/crash_triage/triage_crashes.sh +++ b/utils/crash_triage/triage_crashes.sh @@ -90,13 +90,15 @@ for crash in $DIR/crashes/id:*; do for a in $@; do - if echo "$a" | grep -qF '@@'; then - escaped_fname=`echo $crash | sed 's:/:\\\\/:g'` - use_args="$use_args `echo $a | sed "s/@@/$escaped_fname/g"`" + case "$a" in + *@@*) unset use_stdio - else + use_args="$use_args `printf %s "$a" | sed -e 's<@@<'$crash' Date: Wed, 31 Mar 2021 15:44:27 +0800 Subject: fix #if A == B always evalutes to true --- Android.bp | 1 - utils/afl_frida/afl-frida.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/Android.bp b/Android.bp index 64794e19..bf37757d 100644 --- a/Android.bp +++ b/Android.bp @@ -32,7 +32,6 @@ cc_defaults { target: { android_arm64: { cflags: [ - "-D__aarch64__", "-D__ANDROID__", ], }, diff --git a/utils/afl_frida/afl-frida.c b/utils/afl_frida/afl-frida.c index 711d8f33..e49d6f42 100644 --- a/utils/afl_frida/afl-frida.c +++ b/utils/afl_frida/afl-frida.c @@ -111,7 +111,7 @@ inline static void afl_maybe_log(guint64 current_pc) { } -#if GUM_NATIVE_CPU == GUM_CPU_AMD64 +#ifdef __x86_64__ static const guint8 afl_maybe_log_code[] = { @@ -177,7 +177,7 @@ void instr_basic_block(GumStalkerIterator *iterator, GumStalkerOutput *output, if (instr->address >= range->code_start && instr->address <= range->code_end) { -#if GUM_NATIVE_CPU == GUM_CPU_AMD64 +#ifdef __x86_64__ GumX86Writer *cw = output->writer.x86; if (range->current_log_impl == 0 || !gum_x86_writer_can_branch_directly_between( -- cgit 1.4.1 From 565f61a6abc30dfb4df0269384466589690fbae5 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 2 Apr 2021 20:09:16 +0400 Subject: Initialalize the autodict-ql Initialalize the autodict-ql add codeql scripts --- utils/autodict_ql/litool.ql | 10 ++++++++++ utils/autodict_ql/memcmp-str.ql | 8 ++++++++ utils/autodict_ql/strcmp-str.ql | 8 ++++++++ utils/autodict_ql/strncmp-str.ql | 8 ++++++++ utils/autodict_ql/strtool.ql | 24 ++++++++++++++++++++++++ 5 files changed, 58 insertions(+) create mode 100644 utils/autodict_ql/litool.ql create mode 100644 utils/autodict_ql/memcmp-str.ql create mode 100644 utils/autodict_ql/strcmp-str.ql create mode 100644 utils/autodict_ql/strncmp-str.ql create mode 100644 utils/autodict_ql/strtool.ql (limited to 'utils') diff --git a/utils/autodict_ql/litool.ql b/utils/autodict_ql/litool.ql new file mode 100644 index 00000000..b7f4bf33 --- /dev/null +++ b/utils/autodict_ql/litool.ql @@ -0,0 +1,10 @@ +import cpp + +class HexOrOctLiteral extends Literal{ + HexOrOctLiteral(){ + (this instanceof HexLiteral) or (this instanceof OctalLiteral) + } +} + +from HexOrOctLiteral lit +select lit.getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-str.ql b/utils/autodict_ql/memcmp-str.ql new file mode 100644 index 00000000..830c9cac --- /dev/null +++ b/utils/autodict_ql/memcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : memcmp trace + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("memcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-str.ql b/utils/autodict_ql/strcmp-str.ql new file mode 100644 index 00000000..83ffadaf --- /dev/null +++ b/utils/autodict_ql/strcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strcmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-str.ql b/utils/autodict_ql/strncmp-str.ql new file mode 100644 index 00000000..dbb952e5 --- /dev/null +++ b/utils/autodict_ql/strncmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strncmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strncmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql new file mode 100644 index 00000000..f78aabbb --- /dev/null +++ b/utils/autodict_ql/strtool.ql @@ -0,0 +1,24 @@ +import cpp +import semmle.code.cpp.dataflow.DataFlow +class StringLiteralNode extends DataFlow::Node { + StringLiteralNode() { this.asExpr() instanceof StringLiteral } +} +class MemcmpArgNode extends DataFlow::Node { + MemcmpArgNode() { + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(0) = this.asExpr() + ) + or + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(1) = this.asExpr() + ) + } +} + +from StringLiteralNode src, MemcmpArgNode arg +where + DataFlow::localFlow(src, arg) + +select src.asExpr().(StringLiteral).toString() \ No newline at end of file -- cgit 1.4.1 From a26ed3b7580e31b6f6f174169528fc0bebe20ad6 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:02:45 +0400 Subject: update the codes, readme - add readme - add required qlpack.yml --- utils/autodict_ql/autodict_ql.py | 188 ++++++++++++++++++++++++++++++++++++++ utils/autodict_ql/build-codeql.sh | 17 ++++ utils/autodict_ql/litan.py | 86 +++++++++++++++++ utils/autodict_ql/qlpack.yml | 3 + utils/autodict_ql/readme.md | 81 ++++++++++++++++ utils/autodict_ql/strtool.ql | 6 +- 6 files changed, 378 insertions(+), 3 deletions(-) create mode 100644 utils/autodict_ql/autodict_ql.py create mode 100644 utils/autodict_ql/build-codeql.sh create mode 100644 utils/autodict_ql/litan.py create mode 100644 utils/autodict_ql/qlpack.yml create mode 100644 utils/autodict_ql/readme.md (limited to 'utils') diff --git a/utils/autodict_ql/autodict_ql.py b/utils/autodict_ql/autodict_ql.py new file mode 100644 index 00000000..69d11f48 --- /dev/null +++ b/utils/autodict_ql/autodict_ql.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import shutil +import subprocess + +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + + #parser.add_argument("tokenpath", + #help="Destination directory for tokens") + parser.add_argument("cur", + help = "Current Path") + parser.add_argument("db", + help = "CodeQL database Path") + parser.add_argument("tokenpath", + help="Destination directory for tokens") + + return parser.parse_args() + +def static_analysis(file,file2,cur,db) : + with open(cur+"/"+file, "w") as f: + print(cur+"/"+file) + stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + output = stream.read() + f.write(output) + f.close() + +def copy_tokens(cur, tokenpath) : + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + #strtool-strs + + +def codeql_analysis(cur, db) : + static_analysis("litout.out","litool.ql", cur, db) + static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) + static_analysis("strstr-strings.out","strstr-str.ql", cur, db) + static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) + static_analysis("global-values-strings.out","globals-values.ql", cur, db) + static_analysis("local-strings.out","locals-strs.ql", cur, db) + static_analysis("strtool-strings.out","strtool.ql", cur, db) + static_analysis("arrays.out","array-literals.ql", cur, db) + start_aflql(0,cur) + #command1 = [ + # 'codeql','query', 'run', + # cur + '/litool.ql', + # '-d', + # db, '>','fff.txt' + # ] + #with open("litool2.log", "w") as f: + # stream = os.popen("codeql query run litool.ql -d " + db ) + # output = stream.read() + # f.write(output) + # f.close() + #worker1 = subprocess.Popen(command1) + #print(worker1.communicate()) + + +def start_aflql(tokenpath, cur): + command = [ + 'python3', + cur + '/litan.py', + cur+'/lits/', + cur+'/litout.out' + ] + worker1 = subprocess.Popen(command) + print(worker1.communicate()) + + command1 = [ + 'python3', + cur + '/strcmp-strings.py', + cur + '/strcmp-strs/', + cur + '/strcmp-strings.out' + ] + worker2 = subprocess.Popen(command1) + print(worker2.communicate()) + + command2 = [ + 'python3', + cur + '/strncmp-strings.py', + cur + '/strncmp-strs/', + cur + '/strncmp-strings.out' + ] + worker3 = subprocess.Popen(command2) + print(worker3.communicate()) + + command3 = [ + 'python3', + cur + '/array-lits.py', + cur + '/arrays-lits/', + cur + '/arrays.out' + ] + worker4 = subprocess.Popen(command3) + print(worker4.communicate()) + + command4 = [ + 'python3', + cur + '/array-strings.py', + cur + '/arrays-strs/', + cur + '/arrays.out' + ] + worker5 = subprocess.Popen(command4) + print(worker5.communicate()) + + + command5 = [ + 'python3', + cur + '/memcmp-strings.py', + cur + '/memcmp-strs/', + cur + '/memcmp-strings.out' + ] + worker6 = subprocess.Popen(command5) + print(worker6.communicate()) + + command6 = [ + 'python3', + cur + '/globals-strings.py', + cur + '/global-strs/', + cur + '/global-values-strings.out' + ] + worker7 = subprocess.Popen(command6) + print(worker7.communicate()) + + command7 = [ + 'python3', + cur + '/strstr-strings.py', + cur + '/strstr-strs/', + cur + '/strstr-strings.out' + ] + worker8 = subprocess.Popen(command7) + print(worker8.communicate()) + + + #strtool-strings.out + + command8 = [ + 'python3', + cur + '/stan-strings.py', + cur + '/strtool-strs/', + cur + '/strtool-strings.out' + ] + worker9 = subprocess.Popen(command8) + print(worker9.communicate()) + + command9 = [ + 'python3', + cur + '/local-strings.py', + cur + '/local-strs/', + cur + '/local-strings.out' + ] + worker10 = subprocess.Popen(command9) + print(worker10.communicate()) + +def main(): + args = parse_args() + ensure_dir(args.tokenpath) + #copy_tokens(args.cur, args.tokenpath) + codeql_analysis(args.cur, args.db) + copy_tokens(args.cur, args.tokenpath) + #start_aflql(args.tokenpath, args.cur) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh new file mode 100644 index 00000000..ccff932e --- /dev/null +++ b/utils/autodict_ql/build-codeql.sh @@ -0,0 +1,17 @@ +cd ~ +if [ -d "codeql-home" ]; then + echo "Exist !" + exit 1 +fi +sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y +mkdir codeql-home +cd codeql-home +git clone https://github.com/github/codeql.git codeql-repo +git clone https://github.com/github/codeql-go.git +wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip +unzip codeql-linux64.zip +mv codeql codeql-cli +export "PATH=~/codeql-home/codeql-cli/:$PATH" +codeql resolve languages +codeql resolve qlpacks +echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc \ No newline at end of file diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py new file mode 100644 index 00000000..18c04c34 --- /dev/null +++ b/utils/autodict_ql/litan.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com +import string +import os +import binascii +import codecs +import struct +import errno +import argparse +import re +import base64 +from binascii import unhexlify +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt")) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate files.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt") + return parser.parse_args() +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno == errno.EEXIST: + #print "[-] Directory exists, specify another directory" + exit(1) +def do_analysis1(corpdir, infile): + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() + new_lst = [] + n = 1 + for i, num in enumerate(lines): + if i != 0: + new_lst.append(num) + str1 = str(num) + print ("num is " + str1) + str1 = str1.rstrip('\n\n') + #str1 = str1.replace("0x",""); + str1 = str1.replace("|","") + str1 = str1.rstrip('\r\n') + str1 = str1.rstrip('\n') + str1 = str1.replace(" ","") + #str1 = str1.translate(None, string.punctuation) + translator=str.maketrans('','',string.punctuation) + str1=str1.translate(translator) + str1 = str1[1:] + str1 = str1[:-1] + print("After cleanup : " + str1) + if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8): + print ("first : "+str1) + if len(str1) > 8 : + str1 = str1[:-1] + elif (len(str1) == 5) : + str1 = str1 = "0" + try: + #str1 = str1.decode("hex") + with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file: + str1 = str1.replace("0x",""); + print (str1) + str1 = int(str1,base=16) + str1 = str1.to_bytes(4, byteorder='little') + file.write(str(str1)) + file.close() + with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q : + a = q.readline() + a = a[1:] + print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a)) + q.close() + with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 : + w1.write(str(a)) + print ("Done!") + w1.close() + except: + print("Error!") + n = n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_analysis1(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml new file mode 100644 index 00000000..c037a344 --- /dev/null +++ b/utils/autodict_ql/qlpack.yml @@ -0,0 +1,3 @@ +name: automate +version: 0.0.0 +libraryPathDependencies: codeql-cpp diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md new file mode 100644 index 00000000..77a15f8e --- /dev/null +++ b/utils/autodict_ql/readme.md @@ -0,0 +1,81 @@ +# Autodict-QL - Optimal Token Generation for Fuzzing + +## What is this? + +Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. + +Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . + + +## Why CodeQL ? +We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. + + +## CodeQL scripts +Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test. + +Currently we provided the following CodeQL scripts : + +`strcmp-str.ql` is used to extract strings that are related to `strcmp` function. + +`strncmp-str.ql` is used to extract the strings from the `strncmp` function. + +`memcmp-str.ql` is used to extract the strings from the `memcmp` function. + +`litool.ql` extracts Magic numbers as Hexadecimal format. + +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. + +You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. + + +## Usage +The usage of Autodict-QL is pretty easy. But let's describe it as : + +1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : + +` # chmod +x codeql-build.sh` + +` # codeql ` + +Then you should get : + +` Usage: codeql ... +Create and query CodeQL databases, or work with the QL language. + +GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is +not itself free software. Type codeql --license to see the license terms. + + --license Show the license terms for the CodeQL toolchain. +Common options: + -h, --help Show this help text. + -v, --verbose Incrementally increase the number of progress messages printed. + -q, --quiet Incrementally decrease the number of progress messages printed. +Some advanced options have been hidden; try --help -v for a fuller view. +Commands: + query Compile and execute QL code. + bqrs Get information from .bqrs files. + database Create, analyze and process CodeQL databases. + dataset [Plumbing] Work with raw QL datasets. + test Execute QL unit tests. + resolve [Deep plumbing] Helper commands to resolve disk locations etc. + execute [Deep plumbing] Low-level commands that need special JVM options. + version Show the version of the CodeQL toolchain. + generate Generate formatted QL documentation. + github Commands useful for interacting with the GitHub API through CodeQL. +` + +2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . + - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: + - `./configure --disable-shared` + - `codeql create database libxml-db --language=cpp --command=make + - Now you have the CodeQL database of the project :-) +3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. + - `mkdir automate` (inside the libxml directory) +4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : + - `codeql database upgrade ../libxml-db` +5. Everything is set! :-), now you should issue the following to get the tokens : + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. +6. Done! \ No newline at end of file diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql index f78aabbb..253d1555 100644 --- a/utils/autodict_ql/strtool.ql +++ b/utils/autodict_ql/strtool.ql @@ -3,8 +3,8 @@ import semmle.code.cpp.dataflow.DataFlow class StringLiteralNode extends DataFlow::Node { StringLiteralNode() { this.asExpr() instanceof StringLiteral } } -class MemcmpArgNode extends DataFlow::Node { - MemcmpArgNode() { +class CmpArgNode extends DataFlow::Node { + CmpArgNode() { exists(FunctionCall fc | fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and fc.getArgument(0) = this.asExpr() @@ -17,7 +17,7 @@ class MemcmpArgNode extends DataFlow::Node { } } -from StringLiteralNode src, MemcmpArgNode arg +from StringLiteralNode src, CmpArgNode arg where DataFlow::localFlow(src, arg) -- cgit 1.4.1 From 6088a0d4c2aeada7d952ce05bc1e683b858b1ade Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:04:17 +0400 Subject: update readme update readme --- utils/autodict_ql/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 77a15f8e..a610afb7 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -40,7 +40,7 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : Then you should get : -` Usage: codeql ... +' Usage: codeql ... Create and query CodeQL databases, or work with the QL language. GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is @@ -63,7 +63,7 @@ Commands: version Show the version of the CodeQL toolchain. generate Generate formatted QL documentation. github Commands useful for interacting with the GitHub API through CodeQL. -` +' 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: -- cgit 1.4.1 From cabde32140d6b781fea3c81e535b717bd01b1ec7 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:06:18 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index a610afb7..82aa0a23 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -68,7 +68,7 @@ Commands: 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: - `./configure --disable-shared` - - `codeql create database libxml-db --language=cpp --command=make + - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) 3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. - `mkdir automate` (inside the libxml directory) -- cgit 1.4.1 From 01658fb2e8d923c2d3df225249d9e8e0931511e8 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:07:24 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 82aa0a23..e8d3c761 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -70,8 +70,8 @@ Commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. - - `mkdir automate` (inside the libxml directory) +3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. (inside the libxml directory) + - `mkdir automate` 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - `codeql database upgrade ../libxml-db` 5. Everything is set! :-), now you should issue the following to get the tokens : -- cgit 1.4.1 From 7a383342de6687a09b46151c1f3cf0d44810995a Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:07:52 +0400 Subject: rename python file rename python file --- utils/autodict_ql/autodict-ql.py | 188 +++++++++++++++++++++++++++++++++++++++ utils/autodict_ql/autodict_ql.py | 188 --------------------------------------- 2 files changed, 188 insertions(+), 188 deletions(-) create mode 100644 utils/autodict_ql/autodict-ql.py delete mode 100644 utils/autodict_ql/autodict_ql.py (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py new file mode 100644 index 00000000..69d11f48 --- /dev/null +++ b/utils/autodict_ql/autodict-ql.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import shutil +import subprocess + +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + + #parser.add_argument("tokenpath", + #help="Destination directory for tokens") + parser.add_argument("cur", + help = "Current Path") + parser.add_argument("db", + help = "CodeQL database Path") + parser.add_argument("tokenpath", + help="Destination directory for tokens") + + return parser.parse_args() + +def static_analysis(file,file2,cur,db) : + with open(cur+"/"+file, "w") as f: + print(cur+"/"+file) + stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + output = stream.read() + f.write(output) + f.close() + +def copy_tokens(cur, tokenpath) : + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + #strtool-strs + + +def codeql_analysis(cur, db) : + static_analysis("litout.out","litool.ql", cur, db) + static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) + static_analysis("strstr-strings.out","strstr-str.ql", cur, db) + static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) + static_analysis("global-values-strings.out","globals-values.ql", cur, db) + static_analysis("local-strings.out","locals-strs.ql", cur, db) + static_analysis("strtool-strings.out","strtool.ql", cur, db) + static_analysis("arrays.out","array-literals.ql", cur, db) + start_aflql(0,cur) + #command1 = [ + # 'codeql','query', 'run', + # cur + '/litool.ql', + # '-d', + # db, '>','fff.txt' + # ] + #with open("litool2.log", "w") as f: + # stream = os.popen("codeql query run litool.ql -d " + db ) + # output = stream.read() + # f.write(output) + # f.close() + #worker1 = subprocess.Popen(command1) + #print(worker1.communicate()) + + +def start_aflql(tokenpath, cur): + command = [ + 'python3', + cur + '/litan.py', + cur+'/lits/', + cur+'/litout.out' + ] + worker1 = subprocess.Popen(command) + print(worker1.communicate()) + + command1 = [ + 'python3', + cur + '/strcmp-strings.py', + cur + '/strcmp-strs/', + cur + '/strcmp-strings.out' + ] + worker2 = subprocess.Popen(command1) + print(worker2.communicate()) + + command2 = [ + 'python3', + cur + '/strncmp-strings.py', + cur + '/strncmp-strs/', + cur + '/strncmp-strings.out' + ] + worker3 = subprocess.Popen(command2) + print(worker3.communicate()) + + command3 = [ + 'python3', + cur + '/array-lits.py', + cur + '/arrays-lits/', + cur + '/arrays.out' + ] + worker4 = subprocess.Popen(command3) + print(worker4.communicate()) + + command4 = [ + 'python3', + cur + '/array-strings.py', + cur + '/arrays-strs/', + cur + '/arrays.out' + ] + worker5 = subprocess.Popen(command4) + print(worker5.communicate()) + + + command5 = [ + 'python3', + cur + '/memcmp-strings.py', + cur + '/memcmp-strs/', + cur + '/memcmp-strings.out' + ] + worker6 = subprocess.Popen(command5) + print(worker6.communicate()) + + command6 = [ + 'python3', + cur + '/globals-strings.py', + cur + '/global-strs/', + cur + '/global-values-strings.out' + ] + worker7 = subprocess.Popen(command6) + print(worker7.communicate()) + + command7 = [ + 'python3', + cur + '/strstr-strings.py', + cur + '/strstr-strs/', + cur + '/strstr-strings.out' + ] + worker8 = subprocess.Popen(command7) + print(worker8.communicate()) + + + #strtool-strings.out + + command8 = [ + 'python3', + cur + '/stan-strings.py', + cur + '/strtool-strs/', + cur + '/strtool-strings.out' + ] + worker9 = subprocess.Popen(command8) + print(worker9.communicate()) + + command9 = [ + 'python3', + cur + '/local-strings.py', + cur + '/local-strs/', + cur + '/local-strings.out' + ] + worker10 = subprocess.Popen(command9) + print(worker10.communicate()) + +def main(): + args = parse_args() + ensure_dir(args.tokenpath) + #copy_tokens(args.cur, args.tokenpath) + codeql_analysis(args.cur, args.db) + copy_tokens(args.cur, args.tokenpath) + #start_aflql(args.tokenpath, args.cur) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/autodict_ql.py b/utils/autodict_ql/autodict_ql.py deleted file mode 100644 index 69d11f48..00000000 --- a/utils/autodict_ql/autodict_ql.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python3 -import os -import string -import binascii -import codecs -import errno -import struct -import argparse -import shutil -import subprocess - -from binascii import unhexlify - -def ensure_dir(dir): - try: - os.makedirs(dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise - -def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - - #parser.add_argument("tokenpath", - #help="Destination directory for tokens") - parser.add_argument("cur", - help = "Current Path") - parser.add_argument("db", - help = "CodeQL database Path") - parser.add_argument("tokenpath", - help="Destination directory for tokens") - - return parser.parse_args() - -def static_analysis(file,file2,cur,db) : - with open(cur+"/"+file, "w") as f: - print(cur+"/"+file) - stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) - output = stream.read() - f.write(output) - f.close() - -def copy_tokens(cur, tokenpath) : - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - #strtool-strs - - -def codeql_analysis(cur, db) : - static_analysis("litout.out","litool.ql", cur, db) - static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) - static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("strstr-strings.out","strstr-str.ql", cur, db) - static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("global-values-strings.out","globals-values.ql", cur, db) - static_analysis("local-strings.out","locals-strs.ql", cur, db) - static_analysis("strtool-strings.out","strtool.ql", cur, db) - static_analysis("arrays.out","array-literals.ql", cur, db) - start_aflql(0,cur) - #command1 = [ - # 'codeql','query', 'run', - # cur + '/litool.ql', - # '-d', - # db, '>','fff.txt' - # ] - #with open("litool2.log", "w") as f: - # stream = os.popen("codeql query run litool.ql -d " + db ) - # output = stream.read() - # f.write(output) - # f.close() - #worker1 = subprocess.Popen(command1) - #print(worker1.communicate()) - - -def start_aflql(tokenpath, cur): - command = [ - 'python3', - cur + '/litan.py', - cur+'/lits/', - cur+'/litout.out' - ] - worker1 = subprocess.Popen(command) - print(worker1.communicate()) - - command1 = [ - 'python3', - cur + '/strcmp-strings.py', - cur + '/strcmp-strs/', - cur + '/strcmp-strings.out' - ] - worker2 = subprocess.Popen(command1) - print(worker2.communicate()) - - command2 = [ - 'python3', - cur + '/strncmp-strings.py', - cur + '/strncmp-strs/', - cur + '/strncmp-strings.out' - ] - worker3 = subprocess.Popen(command2) - print(worker3.communicate()) - - command3 = [ - 'python3', - cur + '/array-lits.py', - cur + '/arrays-lits/', - cur + '/arrays.out' - ] - worker4 = subprocess.Popen(command3) - print(worker4.communicate()) - - command4 = [ - 'python3', - cur + '/array-strings.py', - cur + '/arrays-strs/', - cur + '/arrays.out' - ] - worker5 = subprocess.Popen(command4) - print(worker5.communicate()) - - - command5 = [ - 'python3', - cur + '/memcmp-strings.py', - cur + '/memcmp-strs/', - cur + '/memcmp-strings.out' - ] - worker6 = subprocess.Popen(command5) - print(worker6.communicate()) - - command6 = [ - 'python3', - cur + '/globals-strings.py', - cur + '/global-strs/', - cur + '/global-values-strings.out' - ] - worker7 = subprocess.Popen(command6) - print(worker7.communicate()) - - command7 = [ - 'python3', - cur + '/strstr-strings.py', - cur + '/strstr-strs/', - cur + '/strstr-strings.out' - ] - worker8 = subprocess.Popen(command7) - print(worker8.communicate()) - - - #strtool-strings.out - - command8 = [ - 'python3', - cur + '/stan-strings.py', - cur + '/strtool-strs/', - cur + '/strtool-strings.out' - ] - worker9 = subprocess.Popen(command8) - print(worker9.communicate()) - - command9 = [ - 'python3', - cur + '/local-strings.py', - cur + '/local-strs/', - cur + '/local-strings.out' - ] - worker10 = subprocess.Popen(command9) - print(worker10.communicate()) - -def main(): - args = parse_args() - ensure_dir(args.tokenpath) - #copy_tokens(args.cur, args.tokenpath) - codeql_analysis(args.cur, args.db) - copy_tokens(args.cur, args.tokenpath) - #start_aflql(args.tokenpath, args.cur) -if __name__ == '__main__': - main() \ No newline at end of file -- cgit 1.4.1 From 67989e9f2acb5e39e9ef422c27f0fe9db3f7da95 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:09:38 +0400 Subject: update update --- utils/autodict_ql/readme.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index e8d3c761..b368002c 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -40,7 +40,8 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : Then you should get : -' Usage: codeql ... +" +Usage: codeql ... Create and query CodeQL databases, or work with the QL language. GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is @@ -63,7 +64,7 @@ Commands: version Show the version of the CodeQL toolchain. generate Generate formatted QL documentation. github Commands useful for interacting with the GitHub API through CodeQL. -' +" 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: @@ -74,7 +75,7 @@ Commands: - `mkdir automate` 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - `codeql database upgrade ../libxml-db` -5. Everything is set! :-), now you should issue the following to get the tokens : +5. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. -- cgit 1.4.1 From c4f418c3b27bad3cc61eb61e7b9cf6fb7bbe6868 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:12:24 +0400 Subject: Add shell command Add shell command --- utils/autodict_ql/readme.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index b368002c..c9c0d2d0 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -40,7 +40,7 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : Then you should get : -" +```shell Usage: codeql ... Create and query CodeQL databases, or work with the QL language. @@ -64,7 +64,7 @@ Commands: version Show the version of the CodeQL toolchain. generate Generate formatted QL documentation. github Commands useful for interacting with the GitHub API through CodeQL. -" +``` 2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: @@ -76,7 +76,7 @@ Commands: 4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - `codeql database upgrade ../libxml-db` 5. Everything is set! Now you should issue the following to get the tokens : - - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` - - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. 6. Done! \ No newline at end of file -- cgit 1.4.1 From bc99b5ba03815e2cfd2a6314a2fa9da78baa6fb6 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:13:18 +0400 Subject: update readme update readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index c9c0d2d0..c8e5556f 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -2,7 +2,7 @@ ## What is this? -Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . -- cgit 1.4.1 From b418c31479f5d5e1d10c75eafec9ead9351453cb Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:39:09 +0400 Subject: Some updates on readme Some updates on readme --- utils/autodict_ql/readme.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index c8e5556f..45f685c6 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -79,4 +79,19 @@ Commands: - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. -6. Done! \ No newline at end of file +6. Done! + + +## More on dictionaries and tokens +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. + +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` is working based on the target. +In overall, fuzzing with dictionaries and well-generated tokens will give better results. + +There are 2 important points to remember : + +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. +- Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. + + \ No newline at end of file -- cgit 1.4.1 From b7d12c8532da0a094aa2504e90f84a1530d569a9 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 02:45:46 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 45f685c6..39857f69 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -83,7 +83,7 @@ Commands: ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` is working based on the target. -- cgit 1.4.1 From 8f9d1fd7b05f916d8c43d5872be54d9074bdf8db Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 03:16:13 +0400 Subject: Updates update --- utils/autodict_ql/readme.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 39857f69..3e4655c8 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -34,10 +34,11 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : -` # chmod +x codeql-build.sh` - -` # codeql ` - +```shell +# chmod +x codeql-build.sh` +# sudo ./codeql-build.sh +# codeql ` +``` Then you should get : ```shell @@ -93,5 +94,3 @@ There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. - Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. - - \ No newline at end of file -- cgit 1.4.1 From 4291c3db5dca5082aed123f3a353f8af4a0f4785 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:04:06 +0400 Subject: finalize 1 commit final things --- utils/autodict_ql/autodict-ql.py | 91 ++++++++-------------------------------- utils/autodict_ql/readme.md | 6 ++- 2 files changed, 22 insertions(+), 75 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 69d11f48..ddc95435 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -1,4 +1,14 @@ #!/usr/bin/env python3 +# AutoDict-QL - Optimal Token Generation for Fuzzing +# Part of AFL++ Project +# Developed and Maintained by Arash Ale Ebrahim (@Microsvuln) +# Usage : python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH] +# CURRENT_DIR = full of your current Dir +# CODEQL_DATABASE_PATH = Full path to your CodeQL database +# TOKEN_PATH = Folder name of the newly generated tokens +# Example : python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens +# Just pass the tokens folder to the -x flag of your fuzzer + import os import string import binascii @@ -42,47 +52,25 @@ def static_analysis(file,file2,cur,db) : f.close() def copy_tokens(cur, tokenpath) : - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - #strtool-strs + def codeql_analysis(cur, db) : static_analysis("litout.out","litool.ql", cur, db) static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("strstr-strings.out","strstr-str.ql", cur, db) static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("global-values-strings.out","globals-values.ql", cur, db) - static_analysis("local-strings.out","locals-strs.ql", cur, db) static_analysis("strtool-strings.out","strtool.ql", cur, db) - static_analysis("arrays.out","array-literals.ql", cur, db) - start_aflql(0,cur) - #command1 = [ - # 'codeql','query', 'run', - # cur + '/litool.ql', - # '-d', - # db, '>','fff.txt' - # ] - #with open("litool2.log", "w") as f: - # stream = os.popen("codeql query run litool.ql -d " + db ) - # output = stream.read() - # f.write(output) - # f.close() - #worker1 = subprocess.Popen(command1) - #print(worker1.communicate()) - - -def start_aflql(tokenpath, cur): + start_autodict(0,cur) + + + +def start_autodict(tokenpath, cur): command = [ 'python3', cur + '/litan.py', @@ -110,23 +98,6 @@ def start_aflql(tokenpath, cur): worker3 = subprocess.Popen(command2) print(worker3.communicate()) - command3 = [ - 'python3', - cur + '/array-lits.py', - cur + '/arrays-lits/', - cur + '/arrays.out' - ] - worker4 = subprocess.Popen(command3) - print(worker4.communicate()) - - command4 = [ - 'python3', - cur + '/array-strings.py', - cur + '/arrays-strs/', - cur + '/arrays.out' - ] - worker5 = subprocess.Popen(command4) - print(worker5.communicate()) command5 = [ @@ -138,27 +109,8 @@ def start_aflql(tokenpath, cur): worker6 = subprocess.Popen(command5) print(worker6.communicate()) - command6 = [ - 'python3', - cur + '/globals-strings.py', - cur + '/global-strs/', - cur + '/global-values-strings.out' - ] - worker7 = subprocess.Popen(command6) - print(worker7.communicate()) - - command7 = [ - 'python3', - cur + '/strstr-strings.py', - cur + '/strstr-strs/', - cur + '/strstr-strings.out' - ] - worker8 = subprocess.Popen(command7) - print(worker8.communicate()) - #strtool-strings.out - command8 = [ 'python3', cur + '/stan-strings.py', @@ -168,14 +120,7 @@ def start_aflql(tokenpath, cur): worker9 = subprocess.Popen(command8) print(worker9.communicate()) - command9 = [ - 'python3', - cur + '/local-strings.py', - cur + '/local-strs/', - cur + '/local-strings.out' - ] - worker10 = subprocess.Popen(command9) - print(worker10.communicate()) + def main(): args = parse_args() @@ -183,6 +128,6 @@ def main(): #copy_tokens(args.cur, args.tokenpath) codeql_analysis(args.cur, args.db) copy_tokens(args.cur, args.tokenpath) - #start_aflql(args.tokenpath, args.cur) + #start_autodict(args.tokenpath, args.cur) if __name__ == '__main__': main() \ No newline at end of file diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 3e4655c8..f8d23098 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -67,7 +67,7 @@ Commands: github Commands useful for interacting with the GitHub API through CodeQL. ``` -2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` @@ -87,10 +87,12 @@ Commands: Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` is working based on the target. +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` is working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. - Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. + +Thanks are going to Marc Heuse, the AFL++ main developer, Antonio Morales and Stefan Nagy \ No newline at end of file -- cgit 1.4.1 From 7f6d256014ae6728bc938e33b7038105a5714c9b Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:06:12 +0400 Subject: space space --- utils/autodict_ql/readme.md | 1 + 1 file changed, 1 insertion(+) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index f8d23098..0449233b 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -30,6 +30,7 @@ You can write other CodeQL scripts to extract possible effective tokens if you t ## Usage + The usage of Autodict-QL is pretty easy. But let's describe it as : 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : -- cgit 1.4.1 From 7a3dfbce71d45742a6b571e41f07ae6b104e3a6b Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:07:13 +0400 Subject: remove things remove things --- utils/autodict_ql/readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 0449233b..9e6a7292 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -36,9 +36,9 @@ The usage of Autodict-QL is pretty easy. But let's describe it as : 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : ```shell -# chmod +x codeql-build.sh` +# chmod +x codeql-build.sh # sudo ./codeql-build.sh -# codeql ` +# codeql ``` Then you should get : -- cgit 1.4.1 From 8d894eec90ef738702d42274cda0d6a4e5494627 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:11:33 +0400 Subject: Add python scripts Add python scripts --- utils/autodict_ql/memcmp-strings.py | 64 ++++++++++++++++++++++++++++++++++++ utils/autodict_ql/stan-strings.py | 60 +++++++++++++++++++++++++++++++++ utils/autodict_ql/strcmp-strings.py | 60 +++++++++++++++++++++++++++++++++ utils/autodict_ql/strncmp-strings.py | 64 ++++++++++++++++++++++++++++++++++++ 4 files changed, 248 insertions(+) create mode 100644 utils/autodict_ql/memcmp-strings.py create mode 100644 utils/autodict_ql/stan-strings.py create mode 100644 utils/autodict_ql/strcmp-strings.py create mode 100644 utils/autodict_ql/strncmp-strings.py (limited to 'utils') diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py new file mode 100644 index 00000000..e948fba4 --- /dev/null +++ b/utils/autodict_ql/memcmp-strings.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py new file mode 100644 index 00000000..c35d8a65 --- /dev/null +++ b/utils/autodict_ql/stan-strings.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)) or ("<" in str11) or (">" in str11) : + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py new file mode 100644 index 00000000..412b70ae --- /dev/null +++ b/utils/autodict_ql/strcmp-strings.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py new file mode 100644 index 00000000..2c07718e --- /dev/null +++ b/utils/autodict_ql/strncmp-strings.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + print("all strings : %s" % str11) + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("Hahaha : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file -- cgit 1.4.1 From 6c88b6b362ddc06effd8d99c32375ab34028665c Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 14:13:55 +0400 Subject: Update python scripts Update python scripts --- utils/autodict_ql/memcmp-strings.py | 2 +- utils/autodict_ql/stan-strings.py | 6 +++++- utils/autodict_ql/strcmp-strings.py | 6 +++++- utils/autodict_ql/strncmp-strings.py | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index e948fba4..fb892aff 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -53,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index c35d8a65..e9f6f0d0 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + import os import string import binascii @@ -49,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 412b70ae..a1b7e27c 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + import os import string import binascii @@ -49,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 2c07718e..2652f66e 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -53,7 +53,7 @@ def do_string_analysis(corpdir, infile1): else : with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: file.write(str11) - print("Hahaha : %s" % str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) n=n+1 def main(): -- cgit 1.4.1 From 70e975704465672f49273da9f4a8f7e56f745e20 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 15:47:22 +0400 Subject: new commit - change strings new commit - change strings --- utils/autodict_ql/autodict-ql.py | 1 + utils/autodict_ql/build-codeql.sh | 4 ++-- utils/autodict_ql/memcmp-strings.py | 3 +-- utils/autodict_ql/readme.md | 11 ++++++++--- utils/autodict_ql/stan-strings.py | 3 +-- utils/autodict_ql/strcmp-strings.py | 3 +-- utils/autodict_ql/strncmp-strings.py | 3 +-- 7 files changed, 15 insertions(+), 13 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index ddc95435..7bba57fc 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -57,6 +57,7 @@ def copy_tokens(cur, tokenpath) : subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh index ccff932e..450207f6 100644 --- a/utils/autodict_ql/build-codeql.sh +++ b/utils/autodict_ql/build-codeql.sh @@ -3,7 +3,6 @@ if [ -d "codeql-home" ]; then echo "Exist !" exit 1 fi -sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y mkdir codeql-home cd codeql-home git clone https://github.com/github/codeql.git codeql-repo @@ -12,6 +11,7 @@ wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/code unzip codeql-linux64.zip mv codeql codeql-cli export "PATH=~/codeql-home/codeql-cli/:$PATH" +echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc codeql resolve languages codeql resolve qlpacks -echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc \ No newline at end of file +codeql \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index fb892aff..2814da5b 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 9e6a7292..ccc9b0e3 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -31,13 +31,18 @@ You can write other CodeQL scripts to extract possible effective tokens if you t ## Usage +Before proceed to installation make sure that you have the following packages by installing them : +```shell +sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y +``` The usage of Autodict-QL is pretty easy. But let's describe it as : -1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so : - +1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. +Do the following : ```shell # chmod +x codeql-build.sh -# sudo ./codeql-build.sh +# ./codeql-build.sh +# source ~/.bashrc # codeql ``` Then you should get : diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index e9f6f0d0..5a863f80 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)) or ("<" in str11) or (">" in str11) : + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index a1b7e27c..1852b947 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 2652f66e..f00fa3da 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -46,9 +46,8 @@ def do_string_analysis(corpdir, infile1): str11 = str11.replace("\n","") str11 = str11.lstrip() str11 = str11.rstrip() - print("all strings : %s" % str11) str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11)): + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : print("Space / Paranthesis String : %s" % str11) else : with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: -- cgit 1.4.1 From d5fc03b71819ed75bd7134584e8f00a7f1010149 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 15:49:17 +0400 Subject: update qlpack name update qlpack name --- utils/autodict_ql/qlpack.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml index c037a344..28892f24 100644 --- a/utils/autodict_ql/qlpack.yml +++ b/utils/autodict_ql/qlpack.yml @@ -1,3 +1,3 @@ -name: automate +name: autodict version: 0.0.0 libraryPathDependencies: codeql-cpp -- cgit 1.4.1 From 050f331c54a7af2fdb2eb1ca33e9dacd9257dbb0 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:04:14 +0400 Subject: remove unessential things remove unessential things from scripts --- utils/autodict_ql/autodict-ql.py | 10 +++++----- utils/autodict_ql/memcmp-strings.py | 2 +- utils/autodict_ql/stan-strings.py | 2 +- utils/autodict_ql/strcmp-strings.py | 2 +- utils/autodict_ql/strncmp-strings.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 7bba57fc..b51fbb90 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -52,11 +52,11 @@ def static_analysis(file,file2,cur,db) : f.close() def copy_tokens(cur, tokenpath) : - subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index 2814da5b..d1047caa 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index 5a863f80..65d08c97 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 1852b947..88128dbb 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index f00fa3da..0ad0e697 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -40,7 +40,7 @@ def do_string_analysis(corpdir, infile1): for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - print("num : %s" % num1) + #print("num : %s" % num1) str11 = str(num1) str11 = str11.replace("|","") str11 = str11.replace("\n","") -- cgit 1.4.1 From 05c13588d7f6a0c8e34623eeed0b2920737ba377 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:22:51 +0400 Subject: remove dirs remove dirs --- utils/autodict_ql/autodict-ql.py | 2 ++ utils/autodict_ql/readme.md | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index b51fbb90..0fe7eabf 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -57,6 +57,8 @@ def copy_tokens(cur, tokenpath) : subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"],shell=True) + subprocess.call(["rm *.out"],shell=True) subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index ccc9b0e3..16a2a20b 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -93,7 +93,7 @@ Commands: Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` is working based on the target. +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : @@ -101,4 +101,3 @@ There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. - Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. -Thanks are going to Marc Heuse, the AFL++ main developer, Antonio Morales and Stefan Nagy \ No newline at end of file -- cgit 1.4.1 From c4ad4681cf0aa3ff66f98053345ed7856692f25d Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:28:42 +0400 Subject: Update readme Update readme --- utils/autodict_ql/readme.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 16a2a20b..d8a3b014 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -78,15 +78,13 @@ Commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz. (inside the libxml directory) - - `mkdir automate` -4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 : - - `codeql database upgrade ../libxml-db` -5. Everything is set! Now you should issue the following to get the tokens : +3. The final step is to update the CodeQL database you created in the step 2 : + - `codeql database upgrade /home/user/libxml/libxml-db` +4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py `pwd` /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. -6. Done! +5. Done! ## More on dictionaries and tokens -- cgit 1.4.1 From 2b4e93faba3877aeb49ac873b77a930ebd6f0801 Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:30:58 +0400 Subject: Add note Add note --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index d8a3b014..a8d252e4 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -78,7 +78,7 @@ Commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. The final step is to update the CodeQL database you created in the step 2 : +3. The final step is to update the CodeQL database you created in the step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory) : - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` -- cgit 1.4.1 From a7141b6a6ea045a20c0be7031bab0767064915ea Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:31:51 +0400 Subject: Add ` Add ` --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index a8d252e4..6beba871 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -82,7 +82,7 @@ Commands: - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py `pwd` /home/user/libxml/libxml-db tokens` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py ``pwd`` /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. 5. Done! -- cgit 1.4.1 From d35a90101f1ae51fa022332828209139a7e070ad Mon Sep 17 00:00:00 2001 From: microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Sat, 3 Apr 2021 16:33:56 +0400 Subject: change cur change current dir --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 6beba871..8c24d65c 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -82,7 +82,7 @@ Commands: - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens : - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py ``pwd`` /home/user/libxml/libxml-db tokens` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. 5. Done! -- cgit 1.4.1 From 845c584b9cee7092772305912508b825155142fa Mon Sep 17 00:00:00 2001 From: begasus Date: Sun, 4 Apr 2021 17:41:43 +0000 Subject: Fix Haiku references, no and missing defines for USEMMAP --- instrumentation/afl-compiler-rt.o.c | 4 +++- utils/afl_network_proxy/afl-network-server.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'utils') diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index f241447a..fa53263c 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -34,8 +34,10 @@ #include #include -#include #ifndef __HAIKU__ + #include +#endif +#ifndef USEMMAP #include #endif #include diff --git a/utils/afl_network_proxy/afl-network-server.c b/utils/afl_network_proxy/afl-network-server.c index 0dfae658..60f174ee 100644 --- a/utils/afl_network_proxy/afl-network-server.c +++ b/utils/afl_network_proxy/afl-network-server.c @@ -45,7 +45,6 @@ #include #include -#include #include #include #include @@ -53,7 +52,9 @@ #include #include #include -#include +#ifndef USEMMAP + #include +#endif #include #include -- cgit 1.4.1 From 6069cac313f4f8f4e696e815d4fe2f8bcaccccf4 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Wed, 14 Apr 2021 18:24:55 +0200 Subject: qemu driver new api --- utils/aflpp_driver/aflpp_qemu_driver_hook.c | 21 +++++++++++++++------ utils/qemu_persistent_hook/read_into_rdi.c | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/aflpp_qemu_driver_hook.c b/utils/aflpp_driver/aflpp_qemu_driver_hook.c index 823cc42d..d3dd98b0 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver_hook.c +++ b/utils/aflpp_driver/aflpp_qemu_driver_hook.c @@ -1,21 +1,30 @@ +#include "../../qemu_mode/qemuafl/qemuafl/api.h" + #include #include +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) +#define h2g(x) ((uint64_t)(x)-guest_base) -#define REGS_RDI 7 -#define REGS_RSI 6 + // In this example the register RDI is pointing to the memory location + // of the target buffer, and the length of the input is in RSI. + // This can be seen with a debugger, e.g. gdb (and "disass main") -void afl_persistent_hook(uint64_t *regs, uint64_t guest_base, - uint8_t *input_buf, uint32_t input_len) { + memcpy(g2h(regs->rdi), input_buf, input_buf_len); + regs->rsi = input_buf_len; - memcpy(g2h(regs[REGS_RDI]), input_buf, input_len); - regs[REGS_RSI] = input_len; +#undef g2h +#undef h2g } int afl_persistent_hook_init(void) { + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) return 1; } diff --git a/utils/qemu_persistent_hook/read_into_rdi.c b/utils/qemu_persistent_hook/read_into_rdi.c index f4a8ae59..c1c6642f 100644 --- a/utils/qemu_persistent_hook/read_into_rdi.c +++ b/utils/qemu_persistent_hook/read_into_rdi.c @@ -5,7 +5,7 @@ void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, uint8_t *input_buf, uint32_t input_buf_len) { -\ + #define g2h(x) ((void *)((unsigned long)(x) + guest_base)) #define h2g(x) ((uint64_t)(x)-guest_base) -- cgit 1.4.1 From f0d300b32a8a5b3adccc8209c151382244135082 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 18:36:22 +0200 Subject: add readme --- utils/aflpp_driver/README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 utils/aflpp_driver/README.md (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md new file mode 100644 index 00000000..2c339d12 --- /dev/null +++ b/utils/aflpp_driver/README.md @@ -0,0 +1,25 @@ +# afl++ drivers + +## aflpp_driver + +aflpp_driver is used to compile directly libfuzzer `LLVMFuzzerTestOneInput()` +targets. + +Just do `afl-clang-fast++ -o fuzz fuzzer_harness.cc libAFLDriver.a [plus required linking]`. + +You can also sneakily do this little trick: +If this is the clang compile command to build for libfuzzer: + `clang++ -o fuzz -fsanitize=fuzzer fuzzer_harness.cc -lfoo` +then just switch `clang++` with `afl-clang-fast++` and our compiler will +magically insert libAFLDriver.a :) + + +## aflpp_qemu_driver + +aflpp_qemu_driver is used for libfuzzer `LLVMFuzzerTestOneInput()` targets that +are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without +-fsantize=fuzzer or afl-clang-fast, and link in libAFLQemuDriver.a: + +`clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. + +Then just do `AFL_PRELOAD=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` -- cgit 1.4.1 From fd8dc1455278bca16e852eb08ddac9a3e466b5c7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 18:49:02 +0200 Subject: update readme --- utils/aflpp_driver/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md index 2c339d12..01bd10c0 100644 --- a/utils/aflpp_driver/README.md +++ b/utils/aflpp_driver/README.md @@ -22,4 +22,9 @@ are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without `clang++ -o fuzz fuzzer_harness.cc libAFLQemuDriver.a [plus required linking]`. -Then just do `AFL_PRELOAD=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` + +Then just do (where the name of the binary is `fuzz`): +``` +AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}') +AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` +``` -- cgit 1.4.1 From 4a0e0270adafbc583d491dfad74d9378a4c06bf7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 14 Apr 2021 22:23:16 +0200 Subject: allow aflpp_qemu_driver_hook.o to fail --- utils/aflpp_driver/GNUmakefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'utils') diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index c1a087d7..8ac054a6 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -26,17 +26,17 @@ debug: ar ru libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c - $(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c + -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o - ar ru libAFLQemuDriver.a aflpp_qemu_driver.o - cp -vf libAFLQemuDriver.a ../../ + -ar ru libAFLQemuDriver.a aflpp_qemu_driver.o + -cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so + -$(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - $(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c + -$(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c -- cgit 1.4.1 From c8e96e52536d47ee41967657202574d8e61562ee Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 15 Apr 2021 23:56:58 +0200 Subject: autoformat with black --- frida_mode/test/testinstr.py | 49 ++++--- unicorn_mode/helper_scripts/ida_context_loader.py | 84 +++++++----- utils/autodict_ql/autodict-ql.py | 154 ++++++++++++---------- utils/autodict_ql/litan.py | 126 +++++++++++------- utils/autodict_ql/memcmp-strings.py | 64 +++++---- utils/autodict_ql/stan-strings.py | 64 +++++---- utils/autodict_ql/strcmp-strings.py | 64 +++++---- utils/autodict_ql/strncmp-strings.py | 64 +++++---- 8 files changed, 409 insertions(+), 260 deletions(-) (limited to 'utils') diff --git a/frida_mode/test/testinstr.py b/frida_mode/test/testinstr.py index 8f5fe886..f648808b 100755 --- a/frida_mode/test/testinstr.py +++ b/frida_mode/test/testinstr.py @@ -1,32 +1,49 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import argparse from elftools.elf.elffile import ELFFile + def process_file(file, section, base): - with open(file, 'rb') as f: + with open(file, "rb") as f: for sect in ELFFile(f).iter_sections(): - if (sect.name == section): - start = base + sect.header['sh_offset'] - end = start + sect.header['sh_size'] - print ("0x%016x-0x%016x" % (start, end)) + if sect.name == section: + start = base + sect.header["sh_offset"] + end = start + sect.header["sh_size"] + print("0x%016x-0x%016x" % (start, end)) return - print ("Section '%s' not found in '%s'" % (section, file)) + print("Section '%s' not found in '%s'" % (section, file)) + def hex_value(x): return int(x, 16) + def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--section', dest='section', type=str, - help='elf section name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) + parser = argparse.ArgumentParser(description="Process some integers.") + parser.add_argument( + "-f", "--file", dest="file", type=str, help="elf file name", required=True + ) + parser.add_argument( + "-s", + "--section", + dest="section", + type=str, + help="elf section name", + required=True, + ) + parser.add_argument( + "-b", + "--base", + dest="base", + type=hex_value, + help="elf base address", + required=True, + ) args = parser.parse_args() - process_file (args.file, args.section, args.base) + process_file(args.file, args.section, args.base) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/unicorn_mode/helper_scripts/ida_context_loader.py b/unicorn_mode/helper_scripts/ida_context_loader.py index 31d47a90..d7984c77 100644 --- a/unicorn_mode/helper_scripts/ida_context_loader.py +++ b/unicorn_mode/helper_scripts/ida_context_loader.py @@ -34,13 +34,11 @@ import ida_segment class ContextLoaderError(Exception): - """Base "catch all" exception for this script - """ + """Base "catch all" exception for this script""" class ArchNotSupportedError(ContextLoaderError): - """Exception raised if the input file CPU architecture isn't supported fully - """ + """Exception raised if the input file CPU architecture isn't supported fully""" def parse_mapping_index(filepath: str): @@ -51,13 +49,16 @@ def parse_mapping_index(filepath: str): """ if filepath is None: - raise ContextLoaderError('_index.json file was not selected') + raise ContextLoaderError("_index.json file was not selected") try: - with open(filepath, 'rb') as _file: + with open(filepath, "rb") as _file: return json.load(_file) except Exception as ex: - raise ContextLoaderError('Failed to parse json file {}'.format(filepath)) from ex + raise ContextLoaderError( + "Failed to parse json file {}".format(filepath) + ) from ex + def get_input_name(): """Get the name of the input file @@ -68,19 +69,21 @@ def get_input_name(): input_filepath = ida_nalt.get_input_file_path() return Path(input_filepath).name + def write_segment_bytes(start: int, filepath: str): - """"Read data from context file and write it to the IDA segment + """ "Read data from context file and write it to the IDA segment :param start: Start address :param filepath: Path to context file """ - with open(filepath, 'rb') as _file: + with open(filepath, "rb") as _file: data = _file.read() decompressed_data = zlib.decompress(data) ida_bytes.put_bytes(start, decompressed_data) + def create_segment(context_dir: str, segment: dict, is_be: bool): """Create segment in IDA and map in the data from the file @@ -90,23 +93,30 @@ def create_segment(context_dir: str, segment: dict, is_be: bool): """ input_name = get_input_name() - if Path(segment['name']).name != input_name: + if Path(segment["name"]).name != input_name: ida_seg = idaapi.segment_t() - ida_seg.start_ea = segment['start'] - ida_seg.end_ea = segment['end'] + ida_seg.start_ea = segment["start"] + ida_seg.end_ea = segment["end"] ida_seg.bitness = 1 if is_be else 0 - if segment['permissions']['r']: + if segment["permissions"]["r"]: ida_seg.perm |= ida_segment.SEGPERM_READ - if segment['permissions']['w']: + if segment["permissions"]["w"]: ida_seg.perm |= ida_segment.SEGPERM_WRITE - if segment['permissions']['x']: + if segment["permissions"]["x"]: ida_seg.perm |= ida_segment.SEGPERM_EXEC - idaapi.add_segm_ex(ida_seg, Path(segment['name']).name, 'CODE', idaapi.ADDSEG_OR_DIE) + idaapi.add_segm_ex( + ida_seg, Path(segment["name"]).name, "CODE", idaapi.ADDSEG_OR_DIE + ) else: - idaapi.add_segm_ex(ida_seg, Path(segment['name']).name, 'DATA', idaapi.ADDSEG_OR_DIE) + idaapi.add_segm_ex( + ida_seg, Path(segment["name"]).name, "DATA", idaapi.ADDSEG_OR_DIE + ) + + if segment["content_file"]: + write_segment_bytes( + segment["start"], PurePath(context_dir, segment["content_file"]) + ) - if segment['content_file']: - write_segment_bytes(segment['start'], PurePath(context_dir, segment['content_file'])) def create_segments(index: dict, context_dir: str): """Iterate segments in index JSON, create the segment in IDA, and map in the data from the file @@ -117,9 +127,10 @@ def create_segments(index: dict, context_dir: str): info = idaapi.get_inf_structure() is_be = info.is_be() - for segment in index['segments']: + for segment in index["segments"]: create_segment(context_dir, segment, is_be) + def rebase_program(index: dict): """Rebase the program to the offset specified in the context _index.json @@ -128,20 +139,21 @@ def rebase_program(index: dict): input_name = get_input_name() new_base = None - for segment in index['segments']: - if not segment['name']: + for segment in index["segments"]: + if not segment["name"]: continue - segment_name = Path(segment['name']).name + segment_name = Path(segment["name"]).name if input_name == segment_name: - new_base = segment['start'] + new_base = segment["start"] break if not new_base: - raise ContextLoaderError('Input file is not in _index.json') + raise ContextLoaderError("Input file is not in _index.json") current_base = idaapi.get_imagebase() - ida_segment.rebase_program(new_base-current_base, 8) + ida_segment.rebase_program(new_base - current_base, 8) + def get_pc_by_arch(index: dict) -> int: """Queries the input file CPU architecture and attempts to lookup the address of the program @@ -153,13 +165,14 @@ def get_pc_by_arch(index: dict) -> int: progctr = None info = idaapi.get_inf_structure() - if info.procname == 'metapc': + if info.procname == "metapc": if info.is_64bit(): - progctr = index['regs']['rax'] + progctr = index["regs"]["rax"] elif info.is_32bit(): - progctr = index['regs']['eax'] + progctr = index["regs"]["eax"] return progctr + def write_reg_info(index: dict): """Write register info as line comment at instruction pointed to by the program counter and change focus to that location @@ -167,17 +180,19 @@ def write_reg_info(index: dict): :param index: _index.json JSON data """ - cmt = '' - for reg, val in index['regs'].items(): + cmt = "" + for reg, val in index["regs"].items(): cmt += f"{reg.ljust(6)} : {hex(val)}\n" progctr = get_pc_by_arch(index) if progctr is None: raise ArchNotSupportedError( - 'Architecture not fully supported, skipping register status comment') + "Architecture not fully supported, skipping register status comment" + ) ida_bytes.set_cmt(progctr, cmt, 0) ida_kernwin.jumpto(progctr) + def main(filepath): """Main - parse _index.json input and map context files into the database @@ -193,5 +208,6 @@ def main(filepath): except ContextLoaderError as ex: print(ex) -if __name__ == '__main__': - main(ida_kernwin.ask_file(1, '*.json', 'Import file name')) + +if __name__ == "__main__": + main(ida_kernwin.ask_file(1, "*.json", "Import file name")) diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py index 0fe7eabf..f64e3fae 100644 --- a/utils/autodict_ql/autodict-ql.py +++ b/utils/autodict_ql/autodict-ql.py @@ -11,7 +11,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -21,6 +21,7 @@ import subprocess from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -28,109 +29,118 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - - #parser.add_argument("tokenpath", - #help="Destination directory for tokens") - parser.add_argument("cur", - help = "Current Path") - parser.add_argument("db", - help = "CodeQL database Path") - parser.add_argument("tokenpath", - help="Destination directory for tokens") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + + # parser.add_argument("tokenpath", + # help="Destination directory for tokens") + parser.add_argument("cur", help="Current Path") + parser.add_argument("db", help="CodeQL database Path") + parser.add_argument("tokenpath", help="Destination directory for tokens") return parser.parse_args() -def static_analysis(file,file2,cur,db) : - with open(cur+"/"+file, "w") as f: - print(cur+"/"+file) - stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + +def static_analysis(file, file2, cur, db): + with open(cur + "/" + file, "w") as f: + print(cur + "/" + file) + stream = os.popen("codeql query run " + cur + "/" + file2 + " -d " + db) output = stream.read() f.write(output) f.close() -def copy_tokens(cur, tokenpath) : - subprocess.call(["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) - subprocess.call(["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"],shell=True) - subprocess.call(["rm *.out"],shell=True) - subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) - - - -def codeql_analysis(cur, db) : - static_analysis("litout.out","litool.ql", cur, db) - static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) - static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) - static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) - static_analysis("strtool-strings.out","strtool.ql", cur, db) - start_autodict(0,cur) +def copy_tokens(cur, tokenpath): + subprocess.call( + ["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."], shell=True + ) + subprocess.call( + ["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."], + shell=True, + ) + subprocess.call( + ["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"], shell=True + ) + subprocess.call(["rm *.out"], shell=True) + subprocess.call(["find " + tokenpath + " -size 0 -delete"], shell=True) + + +def codeql_analysis(cur, db): + static_analysis("litout.out", "litool.ql", cur, db) + static_analysis("strcmp-strings.out", "strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out", "strncmp-str.ql", cur, db) + static_analysis("memcmp-strings.out", "memcmp-str.ql", cur, db) + static_analysis("strtool-strings.out", "strtool.ql", cur, db) + start_autodict(0, cur) def start_autodict(tokenpath, cur): - command = [ - 'python3', - cur + '/litan.py', - cur+'/lits/', - cur+'/litout.out' - ] + command = ["python3", cur + "/litan.py", cur + "/lits/", cur + "/litout.out"] worker1 = subprocess.Popen(command) print(worker1.communicate()) - + command1 = [ - 'python3', - cur + '/strcmp-strings.py', - cur + '/strcmp-strs/', - cur + '/strcmp-strings.out' - ] + "python3", + cur + "/strcmp-strings.py", + cur + "/strcmp-strs/", + cur + "/strcmp-strings.out", + ] worker2 = subprocess.Popen(command1) print(worker2.communicate()) command2 = [ - 'python3', - cur + '/strncmp-strings.py', - cur + '/strncmp-strs/', - cur + '/strncmp-strings.out' - ] + "python3", + cur + "/strncmp-strings.py", + cur + "/strncmp-strs/", + cur + "/strncmp-strings.out", + ] worker3 = subprocess.Popen(command2) print(worker3.communicate()) - - command5 = [ - 'python3', - cur + '/memcmp-strings.py', - cur + '/memcmp-strs/', - cur + '/memcmp-strings.out' - ] + "python3", + cur + "/memcmp-strings.py", + cur + "/memcmp-strs/", + cur + "/memcmp-strings.out", + ] worker6 = subprocess.Popen(command5) print(worker6.communicate()) - - command8 = [ - 'python3', - cur + '/stan-strings.py', - cur + '/strtool-strs/', - cur + '/strtool-strings.out' - ] + "python3", + cur + "/stan-strings.py", + cur + "/strtool-strs/", + cur + "/strtool-strings.out", + ] worker9 = subprocess.Popen(command8) print(worker9.communicate()) - def main(): - args = parse_args() + args = parse_args() ensure_dir(args.tokenpath) - #copy_tokens(args.cur, args.tokenpath) + # copy_tokens(args.cur, args.tokenpath) codeql_analysis(args.cur, args.db) copy_tokens(args.cur, args.tokenpath) - #start_autodict(args.tokenpath, args.cur) -if __name__ == '__main__': - main() \ No newline at end of file + # start_autodict(args.tokenpath, args.cur) + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py index 18c04c34..7033d363 100644 --- a/utils/autodict_ql/litan.py +++ b/utils/autodict_ql/litan.py @@ -4,7 +4,7 @@ # Author : Microsvuln - Arash.vre@gmail.com import string import os -import binascii +import binascii import codecs import struct import errno @@ -12,75 +12,101 @@ import argparse import re import base64 from binascii import unhexlify + + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt")) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate files.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt") - return parser.parse_args() + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate files." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt", + ) + return parser.parse_args() + + def ensure_dir(dir): try: os.makedirs(dir) except OSError as e: if e.errno == errno.EEXIST: - #print "[-] Directory exists, specify another directory" + # print "[-] Directory exists, specify another directory" exit(1) + + def do_analysis1(corpdir, infile): - with open(infile, "rb") as f: - lines = f.readlines()[1:] - f.close() + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() new_lst = [] n = 1 for i, num in enumerate(lines): if i != 0: - new_lst.append(num) + new_lst.append(num) str1 = str(num) - print ("num is " + str1) - str1 = str1.rstrip('\n\n') - #str1 = str1.replace("0x",""); - str1 = str1.replace("|","") - str1 = str1.rstrip('\r\n') - str1 = str1.rstrip('\n') - str1 = str1.replace(" ","") - #str1 = str1.translate(None, string.punctuation) - translator=str.maketrans('','',string.punctuation) - str1=str1.translate(translator) + print("num is " + str1) + str1 = str1.rstrip("\n\n") + # str1 = str1.replace("0x",""); + str1 = str1.replace("|", "") + str1 = str1.rstrip("\r\n") + str1 = str1.rstrip("\n") + str1 = str1.replace(" ", "") + # str1 = str1.translate(None, string.punctuation) + translator = str.maketrans("", "", string.punctuation) + str1 = str1.translate(translator) str1 = str1[1:] str1 = str1[:-1] print("After cleanup : " + str1) - if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8): - print ("first : "+str1) - if len(str1) > 8 : + if ( + (str1 != "0") + and (str1 != "ffffffff") + and (str1 != "fffffffe") + or (len(str1) == 4) + or (len(str1) == 8) + ): + print("first : " + str1) + if len(str1) > 8: str1 = str1[:-1] - elif (len(str1) == 5) : + elif len(str1) == 5: str1 = str1 = "0" try: - #str1 = str1.decode("hex") - with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file: - str1 = str1.replace("0x",""); - print (str1) - str1 = int(str1,base=16) - str1 = str1.to_bytes(4, byteorder='little') - file.write(str(str1)) - file.close() - with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q : - a = q.readline() - a = a[1:] - print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a)) - q.close() - with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 : - w1.write(str(a)) - print ("Done!") - w1.close() - except: - print("Error!") - n = n+1 + # str1 = str1.decode("hex") + with open(corpdir + "/lit-seed{0}".format(n), "w") as file: + str1 = str1.replace("0x", "") + print(str1) + str1 = int(str1, base=16) + str1 = str1.to_bytes(4, byteorder="little") + file.write(str(str1)) + file.close() + with open(corpdir + "/lit-seed{0}".format(n), "r") as q: + a = q.readline() + a = a[1:] + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token :" + + str(a) + ) + q.close() + with open( + corpdir + "/lit-seed{0}".format(n), "w" + ) as w1: + w1.write(str(a)) + print("Done!") + w1.close() + except: + print("Error!") + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_analysis1(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py index d1047caa..270a697c 100644 --- a/utils/autodict_ql/memcmp-strings.py +++ b/utils/autodict_ql/memcmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/memcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py index 65d08c97..81cb0b97 100644 --- a/utils/autodict_ql/stan-strings.py +++ b/utils/autodict_ql/stan-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/seed-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py index 88128dbb..9c2520c9 100644 --- a/utils/autodict_ql/strcmp-strings.py +++ b/utils/autodict_ql/strcmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/strcmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py index 0ad0e697..6206b4c4 100644 --- a/utils/autodict_ql/strncmp-strings.py +++ b/utils/autodict_ql/strncmp-strings.py @@ -5,7 +5,7 @@ import os import string -import binascii +import binascii import codecs import errno import struct @@ -13,6 +13,7 @@ import argparse import re from binascii import unhexlify + def ensure_dir(dir): try: os.makedirs(dir) @@ -20,44 +21,63 @@ def ensure_dir(dir): if e.errno != errno.EEXIST: raise + def parse_args(): - parser = argparse.ArgumentParser(description=( - "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) - parser.add_argument("corpdir", - help="The path to the corpus directory to generate strings.") - parser.add_argument("infile", - help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + parser = argparse.ArgumentParser( + description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" + ) + ) + parser.add_argument( + "corpdir", help="The path to the corpus directory to generate strings." + ) + parser.add_argument( + "infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt", + ) return parser.parse_args() def do_string_analysis(corpdir, infile1): - with open(infile1, "r") as f1: - lines = f1.readlines()[1:] - f1.close() + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() new_lst1 = [] n = 1 for i, num1 in enumerate(lines): if i != 0: new_lst1.append(num1) - #print("num : %s" % num1) + # print("num : %s" % num1) str11 = str(num1) - str11 = str11.replace("|","") - str11 = str11.replace("\n","") + str11 = str11.replace("|", "") + str11 = str11.replace("\n", "") str11 = str11.lstrip() str11 = str11.rstrip() str11 = str(str11) - if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + if ( + (" " in str11) + or (")" in str11) + or ("(" in str11) + or ("<" in str11) + or (">" in str11) + ): print("Space / Paranthesis String : %s" % str11) - else : - with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: - file.write(str11) - print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) - n=n+1 + else: + with open(corpdir + "/strncmp-str{0}".format(n), "w") as file: + file.write(str11) + print( + "AFL++ Autodict-QL by Microsvuln : Writing Token : %s" + % str11 + ) + n = n + 1 + def main(): - args = parse_args() + args = parse_args() ensure_dir(args.corpdir) do_string_analysis(args.corpdir, args.infile) -if __name__ == '__main__': - main() \ No newline at end of file + + +if __name__ == "__main__": + main() -- cgit 1.4.1 From 846a46e06052c13e3036fbee05866d165adb19cc Mon Sep 17 00:00:00 2001 From: hexcoder Date: Fri, 16 Apr 2021 12:12:52 +0200 Subject: review --- utils/autodict_ql/readme.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 8c24d65c..31a20352 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -2,13 +2,13 @@ ## What is this? -`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses CodeQL (a powerful semantic code analysis engine) to fetch information from a code base. -Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . +Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campaign. If you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . ## Why CodeQL ? -We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. +We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. ## CodeQL scripts @@ -16,7 +16,7 @@ Currently, we pushed some scripts as defaults for Token generation. In addition, Currently we provided the following CodeQL scripts : -`strcmp-str.ql` is used to extract strings that are related to `strcmp` function. +`strcmp-str.ql` is used to extract strings that are related to the `strcmp` function. `strncmp-str.ql` is used to extract the strings from the `strncmp` function. @@ -24,18 +24,18 @@ Currently we provided the following CodeQL scripts : `litool.ql` extracts Magic numbers as Hexadecimal format. -`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. If `strcmp` is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. ## Usage -Before proceed to installation make sure that you have the following packages by installing them : +Before you proceed to installation make sure that you have the following packages by installing them : ```shell sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y ``` -The usage of Autodict-QL is pretty easy. But let's describe it as : +The usage of Autodict-QL is pretty easy. But let's describe it as: 1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. Do the following : @@ -45,7 +45,7 @@ Do the following : # source ~/.bashrc # codeql ``` -Then you should get : +Then you should get: ```shell Usage: codeql ... @@ -73,29 +73,29 @@ Commands: github Commands useful for interacting with the GitHub API through CodeQL. ``` -2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . - - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard. + - First you need to create a CodeQL database of the project codebase, suppose we want to compile `libxml` with codeql. Go to libxml and issue the following commands: - `./configure --disable-shared` - `codeql create database libxml-db --language=cpp --command=make` - Now you have the CodeQL database of the project :-) -3. The final step is to update the CodeQL database you created in the step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory) : +3. The final step is to update the CodeQL database you created in step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory): - `codeql database upgrade /home/user/libxml/libxml-db` -4. Everything is set! Now you should issue the following to get the tokens : +4. Everything is set! Now you should issue the following to get the tokens: - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` - - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to AFL++'s `-x` flag. 5. Done! ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. -On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extract useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. In addition it's independent from LLVM system. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. For example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generate dictionaries based on the target. -I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. +I've personally prefered to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manually generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. In overall, fuzzing with dictionaries and well-generated tokens will give better results. There are 2 important points to remember : -- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. -- Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chances to discover new bugs. +- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. -- cgit 1.4.1 From 523aaaebefb34737cbc0964a284b0ca67f477ad3 Mon Sep 17 00:00:00 2001 From: Microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:39:45 +0430 Subject: Add newline Add newline --- utils/autodict_ql/build-codeql.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh index 450207f6..6ae4b362 100644 --- a/utils/autodict_ql/build-codeql.sh +++ b/utils/autodict_ql/build-codeql.sh @@ -14,4 +14,4 @@ export "PATH=~/codeql-home/codeql-cli/:$PATH" echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc codeql resolve languages codeql resolve qlpacks -codeql \ No newline at end of file +codeql -- cgit 1.4.1 From 2019b42ceda386ce63e36312ea0606b216019bac Mon Sep 17 00:00:00 2001 From: Microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:41:12 +0430 Subject: Update readme fix typo in readme --- utils/autodict_ql/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 8c24d65c..3402a210 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -88,7 +88,7 @@ Commands: ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generating tokens. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. -- cgit 1.4.1 From 98989f1088d04dd4c0d21834c38b7683f1cfb42d Mon Sep 17 00:00:00 2001 From: Microsvuln <55649192+Microsvuln@users.noreply.github.com> Date: Fri, 16 Apr 2021 15:45:22 +0430 Subject: Add new line Add new line --- utils/autodict_ql/litool.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'utils') diff --git a/utils/autodict_ql/litool.ql b/utils/autodict_ql/litool.ql index b7f4bf33..76f429c1 100644 --- a/utils/autodict_ql/litool.ql +++ b/utils/autodict_ql/litool.ql @@ -7,4 +7,4 @@ class HexOrOctLiteral extends Literal{ } from HexOrOctLiteral lit -select lit.getValueText() \ No newline at end of file +select lit.getValueText() -- cgit 1.4.1