| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
 | #!/usr/bin/python
""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """
from __future__ import print_function
from copy import deepcopy
from lxml import etree as ET
import random, re, io
###########################
# The XmlMutatorMin class #
###########################
class XmlMutatorMin:
    """
        Optionals parameters:
            seed        Seed used by the PRNG (default: "RANDOM")
            verbose     Verbosity (default: False)
    """
    def __init__(self, seed="RANDOM", verbose=False):
        """ Initialize seed, database and mutators """
        # Verbosity
        self.verbose = verbose
        # Initialize PRNG
        self.seed = str(seed)
        if self.seed == "RANDOM":
            random.seed()
        else:
            if self.verbose:
                print("Static seed '%s'" % self.seed)
            random.seed(self.seed)
        # Initialize input and output documents
        self.input_tree = None
        self.tree = None
        # High-level mutators (no database needed)
        hl_mutators_delete = ["del_node_and_children", "del_node_but_children", "del_attribute", "del_content"]  # Delete items
        hl_mutators_fuzz = ["fuzz_attribute"]  # Randomly change attribute values
        # Exposed mutators
        self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete
    def __parse_xml(self, xml):
        """ Parse an XML string. Basic wrapper around lxml.parse() """
        try:
            # Function parse() takes care of comments / DTD / processing instructions / ...
            tree = ET.parse(io.BytesIO(xml))
        except ET.ParseError:
            raise RuntimeError("XML isn't well-formed!")
        except LookupError as e:
            raise RuntimeError(e)
        # Return a document wrapper
        return tree
    def __exec_among(self, module, functions, min_times, max_times):
        """ Randomly execute $functions between $min and $max times """
        for i in xrange(random.randint(min_times, max_times)):
            # Function names are mangled because they are "private"
            getattr(module, "_XmlMutatorMin__" + random.choice(functions))()
    def __serialize_xml(self, tree):
        """ Serialize a XML document. Basic wrapper around lxml.tostring() """
        return ET.tostring(tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding)
    def __ver(self, version):
        """ Helper for displaying lxml version numbers """
        return ".".join(map(str, version))
    def reset(self):
        """ Reset the mutator """
        self.tree = deepcopy(self.input_tree)
    def init_from_string(self, input_string):
        """ Initialize the mutator from a XML string """
        # Get a pointer to the top-element
        self.input_tree = self.__parse_xml(input_string)
        # Get a working copy
        self.tree = deepcopy(self.input_tree)
    def save_to_string(self):
        """ Return the current XML document as UTF-8 string """
        # Return a text version of the tree
        return self.__serialize_xml(self.tree)
    def __pick_element(self, exclude_root_node=False):
        """ Pick a random element from the current document """
        # Get a list of all elements, but nodes like PI and comments
        elems = list(self.tree.getroot().iter(tag=ET.Element))
        # Is the root node excluded?
        if exclude_root_node:
            start = 1
        else:
            start = 0
        # Pick a random element
        try:
            elem_id = random.randint(start, len(elems) - 1)
            elem = elems[elem_id]
        except ValueError:
            # Should only occurs if "exclude_root_node = True"
            return (None, None)
        return (elem_id, elem)
    def __fuzz_attribute(self):
        """ Fuzz (part of) an attribute value """
        # Select a node to modify
        (rand_elem_id, rand_elem) = self.__pick_element()
        # Get all the attributes
        attribs = rand_elem.keys()
        # Is there attributes?
        if len(attribs) < 1:
            if self.verbose:
                print("No attribute: can't replace!")
            return
        # Pick a random attribute
        rand_attrib_id = random.randint(0, len(attribs) - 1)
        rand_attrib = attribs[rand_attrib_id]
        # We have the attribute to modify
        # Get its value
        attrib_value = rand_elem.get(rand_attrib)
        # print("- Value: " + attrib_value)
        # Should we work on the whole value?
        func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
        p = re.compile(func_call)
        l = p.findall(attrib_value)
        if random.choice((True, False)) and l:
            # Randomly pick one the function calls
            (func, args) = random.choice(l)
            # Split by "," and randomly pick one of the arguments
            value = random.choice(args.split(','))
            # Remove superfluous characters
            unclean_value = value
            value = value.strip(" ").strip("'")
            # print("Selected argument: [%s]" % value)
        else:
            value = attrib_value
        # For each type, define some possible replacement values
        choices_number =    ( \
                                "0", \
                                "11111", \
                                "-128", \
                                "2", \
                                "-1", \
                                "1/3", \
                                "42/0", \
                                "1094861636 idiv 1.0", \
                                "-1123329771506872 idiv 3.8", \
                                "17=$numericRTF", \
                                str(3 + random.randrange(0, 100)), \
                            )
        choices_letter =    ( \
                                "P" * (25 * random.randrange(1, 100)), \
                                "%s%s%s%s%s%s", \
                                "foobar", \
                            )
        choices_alnum =     ( \
                                "Abc123", \
                                "020F0302020204030204", \
                                "020F0302020204030204" * (random.randrange(5, 20)), \
                            )
        # Fuzz the value
        if random.choice((True,False)) and value == "":
            # Empty
            new_value = value
        elif random.choice((True,False)) and value.isdigit():
            # Numbers
            new_value = random.choice(choices_number)
        elif random.choice((True,False)) and value.isalpha():
            # Letters
            new_value = random.choice(choices_letter)
        elif random.choice((True,False)) and value.isalnum():
            # Alphanumeric
            new_value = random.choice(choices_alnum)
        else:
            # Default type
            new_value = random.choice(choices_alnum + choices_letter + choices_number)
        # If we worked on a substring, apply changes to the whole string
        if value != attrib_value:
            # No ' around empty values
            if new_value != "" and value != "":
                new_value = "'" + new_value + "'"
            # Apply changes
            new_value = attrib_value.replace(unclean_value, new_value)
        # Log something
        if self.verbose:
            print("Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
        # Modify the attribute
        rand_elem.set(rand_attrib, new_value.decode("utf-8"))
    def __del_node_and_children(self):
        """ High-level minimizing mutator
            Delete a random node and its children (i.e. delete a random tree) """
        self.__del_node(True)
    def __del_node_but_children(self):
        """ High-level minimizing mutator
            Delete a random node but its children (i.e. link them to the parent of the deleted node) """
        self.__del_node(False)
    def __del_node(self, delete_children):
        """ Called by the __del_node_* mutators """
        # Select a node to modify (but the root one)
        (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True)
        # If the document includes only a top-level element
        # Then we can't pick a element (given that "exclude_root_node = True")
        # Is the document deep enough?
        if rand_elem is None:
            if self.verbose:
                print("Can't delete a node: document not deep enough!")
            return
        # Log something
        if self.verbose:
            but_or_and = "and" if delete_children else "but"
            print("Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and))
        if delete_children is False:
            # Link children of the random (soon to be deleted) node to its parent
            for child in rand_elem:
                rand_elem.getparent().append(child)
        # Remove the node
        rand_elem.getparent().remove(rand_elem)
    def __del_content(self):
        """ High-level minimizing mutator
            Delete the attributes and children of a random node """
        # Select a node to modify
        (rand_elem_id, rand_elem) = self.__pick_element()
        # Log something
        if self.verbose:
            print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))
        # Reset the node
        rand_elem.clear()
    def __del_attribute(self):
        """ High-level minimizing mutator
            Delete a random attribute from a random node """
        # Select a node to modify
        (rand_elem_id, rand_elem) = self.__pick_element()
        # Get all the attributes
        attribs = rand_elem.keys()
        # Is there attributes?
        if len(attribs) < 1:
            if self.verbose:
                print("No attribute: can't delete!")
            return
        # Pick a random attribute
        rand_attrib_id = random.randint(0, len(attribs) - 1)
        rand_attrib = attribs[rand_attrib_id]
        # Log something
        if self.verbose:
            print("Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
        # Delete the attribute
        rand_elem.attrib.pop(rand_attrib)
    def mutate(self, min=1, max=5):
        """ Execute some high-level mutators between $min and $max times, then some medium-level ones """
        # High-level mutation
        self.__exec_among(self, self.hl_mutators_all, min, max)
 |