about summary refs log tree commit diff
path: root/custom_mutators/examples/XmlMutatorMin.py
diff options
context:
space:
mode:
Diffstat (limited to 'custom_mutators/examples/XmlMutatorMin.py')
-rw-r--r--custom_mutators/examples/XmlMutatorMin.py348
1 files changed, 348 insertions, 0 deletions
diff --git a/custom_mutators/examples/XmlMutatorMin.py b/custom_mutators/examples/XmlMutatorMin.py
new file mode 100644
index 00000000..3e6cd0ff
--- /dev/null
+++ b/custom_mutators/examples/XmlMutatorMin.py
@@ -0,0 +1,348 @@
+#!/usr/bin/python
+
+""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """
+
+from __future__ import print_function
+from copy import deepcopy
+from lxml import etree as ET
+import random, re, io
+
+
+###########################
+# The XmlMutatorMin class #
+###########################
+
+
+class XmlMutatorMin:
+
+    """
+    Optionals parameters:
+        seed        Seed used by the PRNG (default: "RANDOM")
+        verbose     Verbosity (default: False)
+    """
+
+    def __init__(self, seed="RANDOM", verbose=False):
+
+        """ Initialize seed, database and mutators """
+
+        # Verbosity
+        self.verbose = verbose
+
+        # Initialize PRNG
+        self.seed = str(seed)
+        if self.seed == "RANDOM":
+            random.seed()
+        else:
+            if self.verbose:
+                print("Static seed '%s'" % self.seed)
+            random.seed(self.seed)
+
+        # Initialize input and output documents
+        self.input_tree = None
+        self.tree = None
+
+        # High-level mutators (no database needed)
+        hl_mutators_delete = [
+            "del_node_and_children",
+            "del_node_but_children",
+            "del_attribute",
+            "del_content",
+        ]  # Delete items
+        hl_mutators_fuzz = ["fuzz_attribute"]  # Randomly change attribute values
+
+        # Exposed mutators
+        self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete
+
+    def __parse_xml(self, xml):
+
+        """ Parse an XML string. Basic wrapper around lxml.parse() """
+
+        try:
+            # Function parse() takes care of comments / DTD / processing instructions / ...
+            tree = ET.parse(io.BytesIO(xml))
+        except ET.ParseError:
+            raise RuntimeError("XML isn't well-formed!")
+        except LookupError as e:
+            raise RuntimeError(e)
+
+        # Return a document wrapper
+        return tree
+
+    def __exec_among(self, module, functions, min_times, max_times):
+
+        """ Randomly execute $functions between $min and $max times """
+
+        for i in xrange(random.randint(min_times, max_times)):
+            # Function names are mangled because they are "private"
+            getattr(module, "_XmlMutatorMin__" + random.choice(functions))()
+
+    def __serialize_xml(self, tree):
+
+        """ Serialize a XML document. Basic wrapper around lxml.tostring() """
+
+        return ET.tostring(
+            tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding
+        )
+
+    def __ver(self, version):
+
+        """ Helper for displaying lxml version numbers """
+
+        return ".".join(map(str, version))
+
+    def reset(self):
+
+        """ Reset the mutator """
+
+        self.tree = deepcopy(self.input_tree)
+
+    def init_from_string(self, input_string):
+
+        """ Initialize the mutator from a XML string """
+
+        # Get a pointer to the top-element
+        self.input_tree = self.__parse_xml(input_string)
+
+        # Get a working copy
+        self.tree = deepcopy(self.input_tree)
+
+    def save_to_string(self):
+
+        """ Return the current XML document as UTF-8 string """
+
+        # Return a text version of the tree
+        return self.__serialize_xml(self.tree)
+
+    def __pick_element(self, exclude_root_node=False):
+
+        """ Pick a random element from the current document """
+
+        # Get a list of all elements, but nodes like PI and comments
+        elems = list(self.tree.getroot().iter(tag=ET.Element))
+
+        # Is the root node excluded?
+        if exclude_root_node:
+            start = 1
+        else:
+            start = 0
+
+        # Pick a random element
+        try:
+            elem_id = random.randint(start, len(elems) - 1)
+            elem = elems[elem_id]
+        except ValueError:
+            # Should only occurs if "exclude_root_node = True"
+            return (None, None)
+
+        return (elem_id, elem)
+
+    def __fuzz_attribute(self):
+
+        """ Fuzz (part of) an attribute value """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Get all the attributes
+        attribs = rand_elem.keys()
+
+        # Is there attributes?
+        if len(attribs) < 1:
+            if self.verbose:
+                print("No attribute: can't replace!")
+            return
+
+        # Pick a random attribute
+        rand_attrib_id = random.randint(0, len(attribs) - 1)
+        rand_attrib = attribs[rand_attrib_id]
+
+        # We have the attribute to modify
+        # Get its value
+        attrib_value = rand_elem.get(rand_attrib)
+        # print("- Value: " + attrib_value)
+
+        # Should we work on the whole value?
+        func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
+        p = re.compile(func_call)
+        l = p.findall(attrib_value)
+        if random.choice((True, False)) and l:
+            # Randomly pick one the function calls
+            (func, args) = random.choice(l)
+            # Split by "," and randomly pick one of the arguments
+            value = random.choice(args.split(","))
+            # Remove superfluous characters
+            unclean_value = value
+            value = value.strip(" ").strip("'")
+            # print("Selected argument: [%s]" % value)
+        else:
+            value = attrib_value
+
+        # For each type, define some possible replacement values
+        choices_number = (
+            "0",
+            "11111",
+            "-128",
+            "2",
+            "-1",
+            "1/3",
+            "42/0",
+            "1094861636 idiv 1.0",
+            "-1123329771506872 idiv 3.8",
+            "17=$numericRTF",
+            str(3 + random.randrange(0, 100)),
+        )
+
+        choices_letter = (
+            "P" * (25 * random.randrange(1, 100)),
+            "%s%s%s%s%s%s",
+            "foobar",
+        )
+
+        choices_alnum = (
+            "Abc123",
+            "020F0302020204030204",
+            "020F0302020204030204" * (random.randrange(5, 20)),
+        )
+
+        # Fuzz the value
+        if random.choice((True, False)) and value == "":
+
+            # Empty
+            new_value = value
+
+        elif random.choice((True, False)) and value.isdigit():
+
+            # Numbers
+            new_value = random.choice(choices_number)
+
+        elif random.choice((True, False)) and value.isalpha():
+
+            # Letters
+            new_value = random.choice(choices_letter)
+
+        elif random.choice((True, False)) and value.isalnum():
+
+            # Alphanumeric
+            new_value = random.choice(choices_alnum)
+
+        else:
+
+            # Default type
+            new_value = random.choice(choices_alnum + choices_letter + choices_number)
+
+        # If we worked on a substring, apply changes to the whole string
+        if value != attrib_value:
+            # No ' around empty values
+            if new_value != "" and value != "":
+                new_value = "'" + new_value + "'"
+            # Apply changes
+            new_value = attrib_value.replace(unclean_value, new_value)
+
+        # Log something
+        if self.verbose:
+            print(
+                "Fuzzing attribute #%i '%s' of tag #%i '%s'"
+                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
+            )
+
+        # Modify the attribute
+        rand_elem.set(rand_attrib, new_value.decode("utf-8"))
+
+    def __del_node_and_children(self):
+
+        """High-level minimizing mutator
+        Delete a random node and its children (i.e. delete a random tree)"""
+
+        self.__del_node(True)
+
+    def __del_node_but_children(self):
+
+        """High-level minimizing mutator
+        Delete a random node but its children (i.e. link them to the parent of the deleted node)"""
+
+        self.__del_node(False)
+
+    def __del_node(self, delete_children):
+
+        """ Called by the __del_node_* mutators """
+
+        # Select a node to modify (but the root one)
+        (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True)
+
+        # If the document includes only a top-level element
+        # Then we can't pick a element (given that "exclude_root_node = True")
+
+        # Is the document deep enough?
+        if rand_elem is None:
+            if self.verbose:
+                print("Can't delete a node: document not deep enough!")
+            return
+
+        # Log something
+        if self.verbose:
+            but_or_and = "and" if delete_children else "but"
+            print(
+                "Deleting tag #%i '%s' %s its children"
+                % (rand_elem_id, rand_elem.tag, but_or_and)
+            )
+
+        if delete_children is False:
+            # Link children of the random (soon to be deleted) node to its parent
+            for child in rand_elem:
+                rand_elem.getparent().append(child)
+
+        # Remove the node
+        rand_elem.getparent().remove(rand_elem)
+
+    def __del_content(self):
+
+        """High-level minimizing mutator
+        Delete the attributes and children of a random node"""
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Log something
+        if self.verbose:
+            print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))
+
+        # Reset the node
+        rand_elem.clear()
+
+    def __del_attribute(self):
+
+        """High-level minimizing mutator
+        Delete a random attribute from a random node"""
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Get all the attributes
+        attribs = rand_elem.keys()
+
+        # Is there attributes?
+        if len(attribs) < 1:
+            if self.verbose:
+                print("No attribute: can't delete!")
+            return
+
+        # Pick a random attribute
+        rand_attrib_id = random.randint(0, len(attribs) - 1)
+        rand_attrib = attribs[rand_attrib_id]
+
+        # Log something
+        if self.verbose:
+            print(
+                "Deleting attribute #%i '%s' of tag #%i '%s'"
+                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
+            )
+
+        # Delete the attribute
+        rand_elem.attrib.pop(rand_attrib)
+
+    def mutate(self, min=1, max=5):
+
+        """ Execute some high-level mutators between $min and $max times, then some medium-level ones """
+
+        # High-level mutation
+        self.__exec_among(self, self.hl_mutators_all, min, max)