aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rw-r--r--utils/autodict_ql/autodict_ql.py188
-rw-r--r--utils/autodict_ql/build-codeql.sh17
-rw-r--r--utils/autodict_ql/litan.py86
-rw-r--r--utils/autodict_ql/qlpack.yml3
-rw-r--r--utils/autodict_ql/readme.md81
-rw-r--r--utils/autodict_ql/strtool.ql6
6 files changed, 378 insertions, 3 deletions
diff --git a/utils/autodict_ql/autodict_ql.py b/utils/autodict_ql/autodict_ql.py
new file mode 100644
index 00000000..69d11f48
--- /dev/null
+++ b/utils/autodict_ql/autodict_ql.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+import os
+import string
+import binascii
+import codecs
+import errno
+import struct
+import argparse
+import shutil
+import subprocess
+
+from binascii import unhexlify
+
+def ensure_dir(dir):
+ try:
+ os.makedirs(dir)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+def parse_args():
+ parser = argparse.ArgumentParser(description=(
+ "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project --------------------------------------------------------------------------- Example usage : python2 thisfile.py outdir str.txt" ))
+
+ #parser.add_argument("tokenpath",
+ #help="Destination directory for tokens")
+ parser.add_argument("cur",
+ help = "Current Path")
+ parser.add_argument("db",
+ help = "CodeQL database Path")
+ parser.add_argument("tokenpath",
+ help="Destination directory for tokens")
+
+ return parser.parse_args()
+
+def static_analysis(file,file2,cur,db) :
+ with open(cur+"/"+file, "w") as f:
+ print(cur+"/"+file)
+ stream = os.popen("codeql query run " + cur +"/"+ file2 + " -d " + db )
+ output = stream.read()
+ f.write(output)
+ f.close()
+
+def copy_tokens(cur, tokenpath) :
+ subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "strstr-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "local-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "global-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "arrays-lits/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "arrays-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ subprocess.call(["cp " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."] ,shell=True)
+ #strtool-strs
+
+
+def codeql_analysis(cur, db) :
+ static_analysis("litout.out","litool.ql", cur, db)
+ static_analysis("strcmp-strings.out","strcmp-str.ql", cur, db)
+ static_analysis("strncmp-strings.out","strncmp-str.ql", cur, db)
+ static_analysis("strstr-strings.out","strstr-str.ql", cur, db)
+ static_analysis("memcmp-strings.out","memcmp-str.ql", cur, db)
+ static_analysis("global-values-strings.out","globals-values.ql", cur, db)
+ static_analysis("local-strings.out","locals-strs.ql", cur, db)
+ static_analysis("strtool-strings.out","strtool.ql", cur, db)
+ static_analysis("arrays.out","array-literals.ql", cur, db)
+ start_aflql(0,cur)
+ #command1 = [
+ # 'codeql','query', 'run',
+ # cur + '/litool.ql',
+ # '-d',
+ # db, '>','fff.txt'
+ # ]
+ #with open("litool2.log", "w") as f:
+ # stream = os.popen("codeql query run litool.ql -d " + db )
+ # output = stream.read()
+ # f.write(output)
+ # f.close()
+ #worker1 = subprocess.Popen(command1)
+ #print(worker1.communicate())
+
+
+def start_aflql(tokenpath, cur):
+ command = [
+ 'python3',
+ cur + '/litan.py',
+ cur+'/lits/',
+ cur+'/litout.out'
+ ]
+ worker1 = subprocess.Popen(command)
+ print(worker1.communicate())
+
+ command1 = [
+ 'python3',
+ cur + '/strcmp-strings.py',
+ cur + '/strcmp-strs/',
+ cur + '/strcmp-strings.out'
+ ]
+ worker2 = subprocess.Popen(command1)
+ print(worker2.communicate())
+
+ command2 = [
+ 'python3',
+ cur + '/strncmp-strings.py',
+ cur + '/strncmp-strs/',
+ cur + '/strncmp-strings.out'
+ ]
+ worker3 = subprocess.Popen(command2)
+ print(worker3.communicate())
+
+ command3 = [
+ 'python3',
+ cur + '/array-lits.py',
+ cur + '/arrays-lits/',
+ cur + '/arrays.out'
+ ]
+ worker4 = subprocess.Popen(command3)
+ print(worker4.communicate())
+
+ command4 = [
+ 'python3',
+ cur + '/array-strings.py',
+ cur + '/arrays-strs/',
+ cur + '/arrays.out'
+ ]
+ worker5 = subprocess.Popen(command4)
+ print(worker5.communicate())
+
+
+ command5 = [
+ 'python3',
+ cur + '/memcmp-strings.py',
+ cur + '/memcmp-strs/',
+ cur + '/memcmp-strings.out'
+ ]
+ worker6 = subprocess.Popen(command5)
+ print(worker6.communicate())
+
+ command6 = [
+ 'python3',
+ cur + '/globals-strings.py',
+ cur + '/global-strs/',
+ cur + '/global-values-strings.out'
+ ]
+ worker7 = subprocess.Popen(command6)
+ print(worker7.communicate())
+
+ command7 = [
+ 'python3',
+ cur + '/strstr-strings.py',
+ cur + '/strstr-strs/',
+ cur + '/strstr-strings.out'
+ ]
+ worker8 = subprocess.Popen(command7)
+ print(worker8.communicate())
+
+
+ #strtool-strings.out
+
+ command8 = [
+ 'python3',
+ cur + '/stan-strings.py',
+ cur + '/strtool-strs/',
+ cur + '/strtool-strings.out'
+ ]
+ worker9 = subprocess.Popen(command8)
+ print(worker9.communicate())
+
+ command9 = [
+ 'python3',
+ cur + '/local-strings.py',
+ cur + '/local-strs/',
+ cur + '/local-strings.out'
+ ]
+ worker10 = subprocess.Popen(command9)
+ print(worker10.communicate())
+
+def main():
+ args = parse_args()
+ ensure_dir(args.tokenpath)
+ #copy_tokens(args.cur, args.tokenpath)
+ codeql_analysis(args.cur, args.db)
+ copy_tokens(args.cur, args.tokenpath)
+ #start_aflql(args.tokenpath, args.cur)
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/utils/autodict_ql/build-codeql.sh b/utils/autodict_ql/build-codeql.sh
new file mode 100644
index 00000000..ccff932e
--- /dev/null
+++ b/utils/autodict_ql/build-codeql.sh
@@ -0,0 +1,17 @@
+cd ~
+if [ -d "codeql-home" ]; then
+ echo "Exist !"
+ exit 1
+fi
+sudo apt install build-essential libtool-bin python3-dev automake git vim wget -y
+mkdir codeql-home
+cd codeql-home
+git clone https://github.com/github/codeql.git codeql-repo
+git clone https://github.com/github/codeql-go.git
+wget https://github.com/github/codeql-cli-binaries/releases/download/v2.4.6/codeql-linux64.zip
+unzip codeql-linux64.zip
+mv codeql codeql-cli
+export "PATH=~/codeql-home/codeql-cli/:$PATH"
+codeql resolve languages
+codeql resolve qlpacks
+echo "export PATH=~/codeql-home/codeql-cli/:$PATH" >> ~/.bashrc \ No newline at end of file
diff --git a/utils/autodict_ql/litan.py b/utils/autodict_ql/litan.py
new file mode 100644
index 00000000..18c04c34
--- /dev/null
+++ b/utils/autodict_ql/litan.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Autodict-QL - Optimal token generation for fuzzing
+# Part of AFL++ Project
+# Author : Microsvuln - Arash.vre@gmail.com
+import string
+import os
+import binascii
+import codecs
+import struct
+import errno
+import argparse
+import re
+import base64
+from binascii import unhexlify
+def parse_args():
+ parser = argparse.ArgumentParser(description=(
+ "Helper - Specify input file to analysis and output folder to save corpdirus for constants in the overall project ------- Example usage : python2 thisfile.py outdir o.txt"))
+ parser.add_argument("corpdir",
+ help="The path to the corpus directory to generate files.")
+ parser.add_argument("infile",
+ help="Specify file output of codeql analysis - ex. ooo-hex.txt, analysis take place on this file, example : python2 thisfile.py outdir out.txt")
+ return parser.parse_args()
+def ensure_dir(dir):
+ try:
+ os.makedirs(dir)
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ #print "[-] Directory exists, specify another directory"
+ exit(1)
+def do_analysis1(corpdir, infile):
+ with open(infile, "rb") as f:
+ lines = f.readlines()[1:]
+ f.close()
+ new_lst = []
+ n = 1
+ for i, num in enumerate(lines):
+ if i != 0:
+ new_lst.append(num)
+ str1 = str(num)
+ print ("num is " + str1)
+ str1 = str1.rstrip('\n\n')
+ #str1 = str1.replace("0x","");
+ str1 = str1.replace("|","")
+ str1 = str1.rstrip('\r\n')
+ str1 = str1.rstrip('\n')
+ str1 = str1.replace(" ","")
+ #str1 = str1.translate(None, string.punctuation)
+ translator=str.maketrans('','',string.punctuation)
+ str1=str1.translate(translator)
+ str1 = str1[1:]
+ str1 = str1[:-1]
+ print("After cleanup : " + str1)
+ if (str1 != '0') and (str1 != 'ffffffff') and (str1 != 'fffffffe') or (len(str1) == 4) or (len(str1) == 8):
+ print ("first : "+str1)
+ if len(str1) > 8 :
+ str1 = str1[:-1]
+ elif (len(str1) == 5) :
+ str1 = str1 = "0"
+ try:
+ #str1 = str1.decode("hex")
+ with open(corpdir+'/lit-seed{0}'.format(n), 'w') as file:
+ str1 = str1.replace("0x","");
+ print (str1)
+ str1 = int(str1,base=16)
+ str1 = str1.to_bytes(4, byteorder='little')
+ file.write(str(str1))
+ file.close()
+ with open (corpdir+'/lit-seed{0}'.format(n), 'r') as q :
+ a = q.readline()
+ a = a[1:]
+ print ("AFL++ Autodict-QL by Microsvuln : Writing Token :" + str(a))
+ q.close()
+ with open (corpdir+'/lit-seed{0}'.format(n), 'w') as w1 :
+ w1.write(str(a))
+ print ("Done!")
+ w1.close()
+ except:
+ print("Error!")
+ n = n+1
+
+def main():
+ args = parse_args()
+ ensure_dir(args.corpdir)
+ do_analysis1(args.corpdir, args.infile)
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/utils/autodict_ql/qlpack.yml b/utils/autodict_ql/qlpack.yml
new file mode 100644
index 00000000..c037a344
--- /dev/null
+++ b/utils/autodict_ql/qlpack.yml
@@ -0,0 +1,3 @@
+name: automate
+version: 0.0.0
+libraryPathDependencies: codeql-cpp
diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md
new file mode 100644
index 00000000..77a15f8e
--- /dev/null
+++ b/utils/autodict_ql/readme.md
@@ -0,0 +1,81 @@
+# Autodict-QL - Optimal Token Generation for Fuzzing
+
+## What is this?
+
+Autodict-QL is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (Unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses the CodeQL (A powerful semantic code analysis engine) to fetch information from a code base.
+
+Tokens are useful when you perform fuzzing on different parsers. AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campagin. if you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) .
+
+
+## Why CodeQL ?
+We basically developed this plugin on top of CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts.
+
+
+## CodeQL scripts
+Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test.
+
+Currently we provided the following CodeQL scripts :
+
+`strcmp-str.ql` is used to extract strings that are related to `strcmp` function.
+
+`strncmp-str.ql` is used to extract the strings from the `strncmp` function.
+
+`memcmp-str.ql` is used to extract the strings from the `memcmp` function.
+
+`litool.ql` extracts Magic numbers as Hexadecimal format.
+
+`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. if strcmp is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens.
+
+You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful.
+
+
+## Usage
+The usage of Autodict-QL is pretty easy. But let's describe it as :
+
+1. First of all, you need to have CodeQL installed on the system. we make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system, so :
+
+` # chmod +x codeql-build.sh`
+
+` # codeql `
+
+Then you should get :
+
+` Usage: codeql <command> <argument>...
+Create and query CodeQL databases, or work with the QL language.
+
+GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is
+not itself free software. Type codeql --license to see the license terms.
+
+ --license Show the license terms for the CodeQL toolchain.
+Common options:
+ -h, --help Show this help text.
+ -v, --verbose Incrementally increase the number of progress messages printed.
+ -q, --quiet Incrementally decrease the number of progress messages printed.
+Some advanced options have been hidden; try --help -v for a fuller view.
+Commands:
+ query Compile and execute QL code.
+ bqrs Get information from .bqrs files.
+ database Create, analyze and process CodeQL databases.
+ dataset [Plumbing] Work with raw QL datasets.
+ test Execute QL unit tests.
+ resolve [Deep plumbing] Helper commands to resolve disk locations etc.
+ execute [Deep plumbing] Low-level commands that need special JVM options.
+ version Show the version of the CodeQL toolchain.
+ generate Generate formatted QL documentation.
+ github Commands useful for interacting with the GitHub API through CodeQL.
+`
+
+2. Compiler your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard .
+ - First you need to create a CodeQL database of the project codebase, suppose we want to compile the libxml with codeql. go to libxml and issue the following commands:
+ - `./configure --disable-shared`
+ - `codeql create database libxml-db --language=cpp --command=make
+ - Now you have the CodeQL database of the project :-)
+3. To run the Autodict-QL, the final step is to just create a folder named `automate` in the project you want to fuzz.
+ - `mkdir automate` (inside the libxml directory)
+4. The final step is to update the CodeQL database you created in the step 2 inside the automate dir you created at step 3 :
+ - `codeql database upgrade ../libxml-db`
+5. Everything is set! :-), now you should issue the following to get the tokens :
+ - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]`
+ - example : `python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens`
+ - This will create the final `tokens` dir for you and you are done, then pass the tokens path to afl `-x` flag.
+6. Done! \ No newline at end of file
diff --git a/utils/autodict_ql/strtool.ql b/utils/autodict_ql/strtool.ql
index f78aabbb..253d1555 100644
--- a/utils/autodict_ql/strtool.ql
+++ b/utils/autodict_ql/strtool.ql
@@ -3,8 +3,8 @@ import semmle.code.cpp.dataflow.DataFlow
class StringLiteralNode extends DataFlow::Node {
StringLiteralNode() { this.asExpr() instanceof StringLiteral }
}
-class MemcmpArgNode extends DataFlow::Node {
- MemcmpArgNode() {
+class CmpArgNode extends DataFlow::Node {
+ CmpArgNode() {
exists(FunctionCall fc |
fc.getTarget().getName().regexpMatch(".*(str|mem|strn|b)*(cmp|str)*") and
fc.getArgument(0) = this.asExpr()
@@ -17,7 +17,7 @@ class MemcmpArgNode extends DataFlow::Node {
}
}
-from StringLiteralNode src, MemcmpArgNode arg
+from StringLiteralNode src, CmpArgNode arg
where
DataFlow::localFlow(src, arg)