utils/autodict_ql/autodict-ql.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

#!/usr/bin/env python3
# AutoDict-QL - Optimal Token Generation for Fuzzing
# Part of AFL++ Project
# Developed and Maintained by Arash Ale Ebrahim (@Microsvuln)
# Usage : python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]
# CURRENT_DIR = full of your current Dir
# CODEQL_DATABASE_PATH = Full path to your CodeQL database
# TOKEN_PATH = Folder name of the newly generated tokens
# Example : python3 autodict-ql.py /home/user/libxml/automate /home/user/libxml/libxml-db tokens
# Just pass the tokens folder to the -x flag of your fuzzer

import os
import string
import binascii
import codecs
import errno
import struct
import argparse
import shutil
import subprocess

from binascii import unhexlify


def ensure_dir(dir):
    try:
        os.makedirs(dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def parse_args():
    parser = argparse.ArgumentParser(
        description=(
            "Helper - Specify input file analysis and output folder to save corpus for strings in the overall project ---------------------------------------------------------------------------  Example usage : python2 thisfile.py outdir str.txt"
        )
    )

    # parser.add_argument("tokenpath",
    # help="Destination directory for tokens")
    parser.add_argument("cur", help="Current Path")
    parser.add_argument("db", help="CodeQL database Path")
    parser.add_argument("tokenpath", help="Destination directory for tokens")

    return parser.parse_args()


def static_analysis(file, file2, cur, db):
    with open(cur + "/" + file, "w") as f:
        print(cur + "/" + file)
        stream = os.popen("codeql query run " + cur + "/" + file2 + " -d " + db)
        output = stream.read()
        f.write(output)
        f.close()


def copy_tokens(cur, tokenpath):
    subprocess.call(
        ["mv " + cur + "/" + "strcmp-strs/*" + " " + cur + "/" + tokenpath + "/."],
        shell=True,
    )
    subprocess.call(
        ["mv " + cur + "/" + "strncmp-strs/*" + " " + cur + "/" + tokenpath + "/."],
        shell=True,
    )
    subprocess.call(
        ["mv " + cur + "/" + "memcmp-strs/*" + " " + cur + "/" + tokenpath + "/."],
        shell=True,
    )
    subprocess.call(
        ["mv " + cur + "/" + "lits/*" + " " + cur + "/" + tokenpath + "/."], shell=True
    )
    subprocess.call(
        ["mv " + cur + "/" + "strtool-strs/*" + " " + cur + "/" + tokenpath + "/."],
        shell=True,
    )
    subprocess.call(
        ["rm -rf strcmp-strs memcmp-strs strncmp-strs lits strtool-strs"], shell=True
    )
    subprocess.call(["rm *.out"], shell=True)
    subprocess.call(["find " + tokenpath + " -size 0 -delete"], shell=True)


def codeql_analysis(cur, db):
    static_analysis("litout.out", "litool.ql", cur, db)
    static_analysis("strcmp-strings.out", "strcmp-str.ql", cur, db)
    static_analysis("strncmp-strings.out", "strncmp-str.ql", cur, db)
    static_analysis("memcmp-strings.out", "memcmp-str.ql", cur, db)
    static_analysis("strtool-strings.out", "strtool.ql", cur, db)
    start_autodict(0, cur)


def start_autodict(tokenpath, cur):
    command = ["python3", cur + "/litan.py", cur + "/lits/", cur + "/litout.out"]
    worker1 = subprocess.Popen(command)
    print(worker1.communicate())

    command1 = [
        "python3",
        cur + "/strcmp-strings.py",
        cur + "/strcmp-strs/",
        cur + "/strcmp-strings.out",
    ]
    worker2 = subprocess.Popen(command1)
    print(worker2.communicate())

    command2 = [
        "python3",
        cur + "/strncmp-strings.py",
        cur + "/strncmp-strs/",
        cur + "/strncmp-strings.out",
    ]
    worker3 = subprocess.Popen(command2)
    print(worker3.communicate())

    command5 = [
        "python3",
        cur + "/memcmp-strings.py",
        cur + "/memcmp-strs/",
        cur + "/memcmp-strings.out",
    ]
    worker6 = subprocess.Popen(command5)
    print(worker6.communicate())

    command8 = [
        "python3",
        cur + "/stan-strings.py",
        cur + "/strtool-strs/",
        cur + "/strtool-strings.out",
    ]
    worker9 = subprocess.Popen(command8)
    print(worker9.communicate())


def main():
    args = parse_args()
    ensure_dir(args.tokenpath)
    # copy_tokens(args.cur, args.tokenpath)
    codeql_analysis(args.cur, args.db)
    copy_tokens(args.cur, args.tokenpath)
    # start_autodict(args.tokenpath, args.cur)


if __name__ == "__main__":
    main()