diff options
-rw-r--r-- | utils/autodict_ql/autodict-ql.py | 136 | ||||
-rw-r--r-- | utils/autodict_ql/build-codeql.sh | 17 | ||||
-rw-r--r-- | utils/autodict_ql/litan.py | 86 | ||||
-rw-r--r-- | utils/autodict_ql/litool.ql | 10 | ||||
-rw-r--r-- | utils/autodict_ql/memcmp-str.ql | 8 | ||||
-rw-r--r-- | utils/autodict_ql/memcmp-strings.py | 63 | ||||
-rw-r--r-- | utils/autodict_ql/qlpack.yml | 3 | ||||
-rw-r--r-- | utils/autodict_ql/readme.md | 101 | ||||
-rw-r--r-- | utils/autodict_ql/stan-strings.py | 63 | ||||
-rw-r--r-- | utils/autodict_ql/strcmp-str.ql | 8 | ||||
-rw-r--r-- | utils/autodict_ql/strcmp-strings.py | 63 | ||||
-rw-r--r-- | utils/autodict_ql/strncmp-str.ql | 8 | ||||
-rw-r--r-- | utils/autodict_ql/strncmp-strings.py | 63 | ||||
-rw-r--r-- | utils/autodict_ql/strtool.ql | 24 |
14 files changed, 653 insertions, 0 deletions
diff --git a/utils/autodict_ql/autodict-ql.py b/utils/autodict_ql/autodict-ql.py new file mode 100644 index 00000000..0fe7eabf --- /dev/null +++ b/utils/autodict_ql/autodict-ql.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# AutoDict-QL - Optimal Token Generation for Fuzzing +# Part of AFL++ Project +# Developed and Maintained by Arash Ale Ebrahim (@Microsvuln) +# Usage : python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH] +# CURRENT_DIR = full of your current Dir +# CODEQL_DATABASE_PATH = Full path to your CodeQL database +# TOKEN_PATH = Folder name of the newly generated tokens +# Example : python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens +# Just pass the tokens folder to the -x flag of your fuzzer + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import shutil +import subprocess + +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + + #parser.add_argument("tokenpath", + #help="Destination directory for tokens") + parser.add_argument("cur", + help = "Current Path") + parser.add_argument("db", + help = "CodeQL database Path") + parser.add_argument("tokenpath", + help="Destination directory for tokens") + + return parser.parse_args() + +def static_analysis(file,file2,cur,db) : + with open(cur+"/"+file, "w") as f: + print(cur+"/"+file) + stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db ) + output = stream.read() + f.write(output) + f.close() + +def copy_tokens(cur, tokenpath) : + subprocess.call(["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True) + subprocess.call(["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"],shell=True) + subprocess.call(["rm *.out"],shell=True) + subprocess.call(["find "+tokenpath+" -size 0 -delete"],shell=True) + + + +def codeql_analysis(cur, db) : + static_analysis("litout.out","litool.ql", cur, db) + static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db) + static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db) + static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db) + static_analysis("strtool-strings.out","strtool.ql", cur, db) + start_autodict(0,cur) + + + +def start_autodict(tokenpath, cur): + command = [ + 'python3', + cur + '/litan.py', + cur+'/lits/', + cur+'/litout.out' + ] + worker1 = subprocess.Popen(command) + print(worker1.communicate()) + + command1 = [ + 'python3', + cur + '/strcmp-strings.py', + cur + '/strcmp-strs/', + cur + '/strcmp-strings.out' + ] + worker2 = subprocess.Popen(command1) + print(worker2.communicate()) + + command2 = [ + 'python3', + cur + '/strncmp-strings.py', + cur + '/strncmp-strs/', + cur + '/strncmp-strings.out' + ] + worker3 = subprocess.Popen(command2) + print(worker3.communicate()) + + + + command5 = [ + 'python3', + cur + '/memcmp-strings.py', + cur + '/memcmp-strs/', + cur + '/memcmp-strings.out' + ] + worker6 = subprocess.Popen(command5) + print(worker6.communicate()) + + + + command8 = [ + 'python3', + cur + '/stan-strings.py', + cur + '/strtool-strs/', + cur + '/strtool-strings.out' + ] + worker9 = subprocess.Popen(command8) + print(worker9.communicate()) + + + +def main(): + args = parse_args() + ensure_dir(args.tokenpath) + #copy_tokens(args.cur, args.tokenpath) + codeql_analysis(args.cur, args.db) + copy_tokens(args.cur, args.tokenpath) + #start_autodict(args.tokenpath, args.cur) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh new file mode 100644 index 00000000..450207f6 --- /dev/null +++ b/utils/autodict_ql/build-codeql.sh @@ -0,0 +1,17 @@ +cd ~ +if [ -d "codeql-home" ]; then + echo "Exist !" + exit 1 +fi +mkdir codeql-home +cd codeql-home +git clone https://github.com/github/codeql.git codeql-repo +git clone https://github.com/github/codeql-go.git +wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip +unzip codeql-linux64.zip +mv codeql codeql-cli +export "PATH=~/codeql-home/codeql-cli/:$PATH" +echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc +codeql resolve languages +codeql resolve qlpacks +codeql \ No newline at end of file diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py new file mode 100644 index 00000000..18c04c34 --- /dev/null +++ b/utils/autodict_ql/litan.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com +import string +import os +import binascii +import codecs +import struct +import errno +import argparse +import re +import base64 +from binascii import unhexlify +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt")) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate files.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt") + return parser.parse_args() +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno == errno.EEXIST: + #print "[-] Directory exists, specify another directory" + exit(1) +def do_analysis1(corpdir, infile): + with open(infile, "rb") as f: + lines = f.readlines()[1:] + f.close() + new_lst = [] + n = 1 + for i, num in enumerate(lines): + if i != 0: + new_lst.append(num) + str1 = str(num) + print ("num is " + str1) + str1 = str1.rstrip('\n\n') + #str1 = str1.replace("0x",""); + str1 = str1.replace("|","") + str1 = str1.rstrip('\r\n') + str1 = str1.rstrip('\n') + str1 = str1.replace(" ","") + #str1 = str1.translate(None, string.punctuation) + translator=str.maketrans('','',string.punctuation) + str1=str1.translate(translator) + str1 = str1[1:] + str1 = str1[:-1] + print("After cleanup : " + str1) + if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8): + print ("first : "+str1) + if len(str1) > 8 : + str1 = str1[:-1] + elif (len(str1) == 5) : + str1 = str1 = "0" + try: + #str1 = str1.decode("hex") + with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file: + str1 = str1.replace("0x",""); + print (str1) + str1 = int(str1,base=16) + str1 = str1.to_bytes(4, byteorder='little') + file.write(str(str1)) + file.close() + with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q : + a = q.readline() + a = a[1:] + print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a)) + q.close() + with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 : + w1.write(str(a)) + print ("Done!") + w1.close() + except: + print("Error!") + n = n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_analysis1(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/litool.ql b/utils/autodict_ql/litool.ql new file mode 100644 index 00000000..b7f4bf33 --- /dev/null +++ b/utils/autodict_ql/litool.ql @@ -0,0 +1,10 @@ +import cpp + +class HexOrOctLiteral extends Literal{ + HexOrOctLiteral(){ + (this instanceof HexLiteral) or (this instanceof OctalLiteral) + } +} + +from HexOrOctLiteral lit +select lit.getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-str.ql b/utils/autodict_ql/memcmp-str.ql new file mode 100644 index 00000000..830c9cac --- /dev/null +++ b/utils/autodict_ql/memcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : memcmp trace + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("memcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/memcmp-strings.py b/utils/autodict_ql/memcmp-strings.py new file mode 100644 index 00000000..d1047caa --- /dev/null +++ b/utils/autodict_ql/memcmp-strings.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + #print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/memcmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml new file mode 100644 index 00000000..28892f24 --- /dev/null +++ b/utils/autodict_ql/qlpack.yml @@ -0,0 +1,3 @@ +name: autodict +version: 0.0.0 +libraryPathDependencies: codeql-cpp diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md new file mode 100644 index 00000000..8c24d65c --- /dev/null +++ b/utils/autodict_ql/readme.md @@ -0,0 +1,101 @@ +# Autodict-QL - Optimal Token Generation for Fuzzing + +## What is this? + +`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base. + +Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . + + +## Why CodeQL ? +We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. + + +## CodeQL scripts +Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test. + +Currently we provided the following CodeQL scripts : + +`strcmp-str.ql` is used to extract strings that are related to `strcmp` function. + +`strncmp-str.ql` is used to extract the strings from the `strncmp` function. + +`memcmp-str.ql` is used to extract the strings from the `memcmp` function. + +`litool.ql` extracts Magic numbers as Hexadecimal format. + +`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. + +You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. + + +## Usage + +Before proceed to installation make sure that you have the following packages by installing them : +```shell +sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y +``` +The usage of Autodict-QL is pretty easy. But let's describe it as : + +1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. +Do the following : +```shell +# chmod +x codeql-build.sh +# ./codeql-build.sh +# source ~/.bashrc +# codeql +``` +Then you should get : + +```shell +Usage: codeql <command> <argument>... +Create and query CodeQL databases, or work with the QL language. + +GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is +not itself free software. Type codeql --license to see the license terms. + + --license Show the license terms for the CodeQL toolchain. +Common options: + -h, --help Show this help text. + -v, --verbose Incrementally increase the number of progress messages printed. + -q, --quiet Incrementally decrease the number of progress messages printed. +Some advanced options have been hidden; try --help -v for a fuller view. +Commands: + query Compile and execute QL code. + bqrs Get information from .bqrs files. + database Create, analyze and process CodeQL databases. + dataset [Plumbing] Work with raw QL datasets. + test Execute QL unit tests. + resolve [Deep plumbing] Helper commands to resolve disk locations etc. + execute [Deep plumbing] Low-level commands that need special JVM options. + version Show the version of the CodeQL toolchain. + generate Generate formatted QL documentation. + github Commands useful for interacting with the GitHub API through CodeQL. +``` + +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard . + - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands: + - `./configure --disable-shared` + - `codeql create database libxml-db --language=cpp --command=make` + - Now you have the CodeQL database of the project :-) +3. The final step is to update the CodeQL database you created in the step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory) : + - `codeql database upgrade /home/user/libxml/libxml-db` +4. Everything is set! Now you should issue the following to get the tokens : + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag. +5. Done! + + +## More on dictionaries and tokens +Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extracts useful tokens, in addition with LTO instrumentation mode, this dict2file is automtically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. in addition it's independent from LLVM system. +On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file format and speicifications. for example, for testing binutils and ELF file format or AVI in FFMPEG, there are no prebuilt dictionary, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generating dictionaries based on the target. + +I've personally prefer to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manully generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. +In overall, fuzzing with dictionaries and well-generated tokens will give better results. + +There are 2 important points to remember : + +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chance to discover new bugs. +- Do not remember to set the `AFL_MAX_DET_EXTRAS` to the number of generated dictionaries, if you forget to set this environment variable, then AFL++ use just 200 tokens and use the rest of them probablistically. So this will guarantees that your tokens will be used by AFL++. + diff --git a/utils/autodict_ql/stan-strings.py b/utils/autodict_ql/stan-strings.py new file mode 100644 index 00000000..65d08c97 --- /dev/null +++ b/utils/autodict_ql/stan-strings.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + #print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/seed-str{0}'.format(n), 'w') as file: + file.write(str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-str.ql b/utils/autodict_ql/strcmp-str.ql new file mode 100644 index 00000000..83ffadaf --- /dev/null +++ b/utils/autodict_ql/strcmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strcmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strcmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strcmp-strings.py b/utils/autodict_ql/strcmp-strings.py new file mode 100644 index 00000000..88128dbb --- /dev/null +++ b/utils/autodict_ql/strcmp-strings.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + #print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/strcmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-str.ql b/utils/autodict_ql/strncmp-str.ql new file mode 100644 index 00000000..dbb952e5 --- /dev/null +++ b/utils/autodict_ql/strncmp-str.ql @@ -0,0 +1,8 @@ +import cpp + +/// function : strncmp + +from FunctionCall fucall, Expr size +where + fucall.getTarget().hasName("strncmp") +select fucall.getArgument(_).getValueText() \ No newline at end of file diff --git a/utils/autodict_ql/strncmp-strings.py b/utils/autodict_ql/strncmp-strings.py new file mode 100644 index 00000000..0ad0e697 --- /dev/null +++ b/utils/autodict_ql/strncmp-strings.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# Autodict-QL - Optimal token generation for fuzzing +# Part of AFL++ Project +# Author : Microsvuln - Arash.vre@gmail.com + +import os +import string +import binascii +import codecs +import errno +import struct +import argparse +import re +from binascii import unhexlify + +def ensure_dir(dir): + try: + os.makedirs(dir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + +def parse_args(): + parser = argparse.ArgumentParser(description=( + "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" )) + parser.add_argument("corpdir", + help="The path to the corpus directory to generate strings.") + parser.add_argument("infile", + help="Specify file output of codeql analysis - ex. ooo-atr.txt, analysis take place on this file, example : python2 thisfile.py outdir strings.txt") + + return parser.parse_args() + + +def do_string_analysis(corpdir, infile1): + with open(infile1, "r") as f1: + lines = f1.readlines()[1:] + f1.close() + new_lst1 = [] + n = 1 + for i, num1 in enumerate(lines): + if i != 0: + new_lst1.append(num1) + #print("num : %s" % num1) + str11 = str(num1) + str11 = str11.replace("|","") + str11 = str11.replace("\n","") + str11 = str11.lstrip() + str11 = str11.rstrip() + str11 = str(str11) + if ((" " in str11 ) or (")" in str11) or ("(" in str11) or ("<" in str11) or (">" in str11)) : + print("Space / Paranthesis String : %s" % str11) + else : + with open(corpdir+'/strncmp-str{0}'.format(n), 'w') as file: + file.write(str11) + print("AFL++ Autodict-QL by Microsvuln : Writing Token : %s" % str11) + n=n+1 + +def main(): + args = parse_args() + ensure_dir(args.corpdir) + do_string_analysis(args.corpdir, args.infile) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql new file mode 100644 index 00000000..253d1555 --- /dev/null +++ b/utils/autodict_ql/strtool.ql @@ -0,0 +1,24 @@ +import cpp +import semmle.code.cpp.dataflow.DataFlow +class StringLiteralNode extends DataFlow::Node { + StringLiteralNode() { this.asExpr() instanceof StringLiteral } +} +class CmpArgNode extends DataFlow::Node { + CmpArgNode() { + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(0) = this.asExpr() + ) + or + exists(FunctionCall fc | + fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and + fc.getArgument(1) = this.asExpr() + ) + } +} + +from StringLiteralNode src, CmpArgNode arg +where + DataFlow::localFlow(src, arg) + +select src.asExpr().(StringLiteral).toString() \ No newline at end of file |