6 files changed, 665 insertions, 0 deletions
diff --git a/examples/python_mutators/README b/examples/python_mutators/README
new file mode 100644
index 00000000..4e7d62bc
--- /dev/null
+++ b/examples/python_mutators/README
@@ -0,0 +1,18 @@
+These are example and helper files for the AFL_PYTHON_MODULE feature.
+See docs/python_mutators.txt for more information
+
+Note that if you compile with python3.7 you must use python3 scripts, and if
+you use pyton2.7 to compile python2 scripts!
+
+
+example.py	- this is the template you can use, the functions are there
+		  but they are empty
+
+simple-chunk-replace.py - this is a simple example where chunks are replaced
+
+common.py 	- this can be used for common functions and helpers.
+		  the examples do not use this though. But you can :)
+
+wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py
+
+XmlMutatorMin.py - module for XML mutation
diff --git a/examples/python_mutators/XmlMutatorMin.py b/examples/python_mutators/XmlMutatorMin.py
new file mode 100644
index 00000000..058b7e61
--- /dev/null
+++ b/examples/python_mutators/XmlMutatorMin.py
@@ -0,0 +1,331 @@
+#!/usr/bin/python
+
+""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """
+
+from __future__ import print_function
+from copy import deepcopy
+from lxml import etree as ET
+import random, re, io
+
+###########################
+# The XmlMutatorMin class #
+###########################
+
+class XmlMutatorMin:
+
+    """
+        Optionals parameters:
+            seed        Seed used by the PRNG (default: "RANDOM")
+            verbose     Verbosity (default: False)
+    """
+
+    def __init__(self, seed="RANDOM", verbose=False):
+
+        """ Initialize seed, database and mutators """
+
+        # Verbosity
+        self.verbose = verbose
+
+        # Initialize PRNG
+        self.seed = str(seed)
+        if self.seed == "RANDOM":
+            random.seed()
+        else:
+            if self.verbose:
+                print("Static seed '%s'" % self.seed)
+            random.seed(self.seed)
+
+        # Initialize input and output documents
+        self.input_tree = None
+        self.tree = None
+
+        # High-level mutators (no database needed)
+        hl_mutators_delete = [ "del_node_and_children", "del_node_but_children", "del_attribute", "del_content" ] # Delete items
+        hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values
+
+        # Exposed mutators
+        self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete
+        
+    def __parse_xml (self, xml):
+
+        """ Parse an XML string. Basic wrapper around lxml.parse() """
+
+        try:
+            # Function parse() takes care of comments / DTD / processing instructions / ...
+        	tree = ET.parse(io.BytesIO(xml))
+        except ET.ParseError:
+            raise RuntimeError("XML isn't well-formed!")
+        except LookupError as e:
+            raise RuntimeError(e)
+
+        # Return a document wrapper
+        return tree
+
+    def __exec_among (self, module, functions, min_times, max_times):
+
+        """ Randomly execute $functions between $min and $max times """
+
+        for i in xrange (random.randint (min_times, max_times)):
+            # Function names are mangled because they are "private"
+            getattr (module, "_XmlMutatorMin__" + random.choice(functions)) ()
+
+    def __serialize_xml (self, tree):
+
+        """ Serialize a XML document. Basic wrapper around lxml.tostring() """
+
+        return ET.tostring(tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding)
+
+    def __ver (self, version):
+
+        """ Helper for displaying lxml version numbers """
+
+        return ".".join(map(str, version))
+
+    def reset (self):
+    
+        """ Reset the mutator """
+
+        self.tree = deepcopy(self.input_tree)
+
+    def init_from_string (self, input_string):
+    
+        """ Initialize the mutator from a XML string """
+
+        # Get a pointer to the top-element
+        self.input_tree = self.__parse_xml(input_string)
+
+        # Get a working copy
+        self.tree = deepcopy(self.input_tree)
+
+    def save_to_string (self):
+    
+        """ Return the current XML document as UTF-8 string """
+
+        # Return a text version of the tree
+        return self.__serialize_xml(self.tree)
+
+    def __pick_element (self, exclude_root_node = False):
+    
+        """ Pick a random element from the current document """
+
+        # Get a list of all elements, but nodes like PI and comments
+        elems = list(self.tree.getroot().iter(tag=ET.Element))
+
+        # Is the root node excluded?
+        if exclude_root_node:
+            start = 1
+        else:
+            start = 0
+
+        # Pick a random element
+        try:
+            elem_id = random.randint (start, len(elems) - 1)
+            elem = elems[elem_id]
+        except ValueError:
+            # Should only occurs if "exclude_root_node = True"
+            return (None, None)
+
+        return (elem_id, elem)
+
+    def __fuzz_attribute (self):
+    
+        """ Fuzz (part of) an attribute value """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Get all the attributes
+        attribs = rand_elem.keys()
+
+        # Is there attributes?
+        if len(attribs) < 1:
+            if self.verbose:
+                print("No attribute: can't replace!")
+            return
+
+        # Pick a random attribute
+        rand_attrib_id = random.randint (0, len(attribs) - 1)
+        rand_attrib = attribs[rand_attrib_id]
+
+        # We have the attribute to modify
+        # Get its value
+        attrib_value = rand_elem.get(rand_attrib);
+        # print("- Value: " + attrib_value)
+
+        # Should we work on the whole value?
+        func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
+        p = re.compile(func_call)
+        l = p.findall(attrib_value)
+        if random.choice((True,False)) and l:
+            # Randomly pick one the function calls
+            (func, args) = random.choice(l)
+            # Split by "," and randomly pick one of the arguments
+            value = random.choice(args.split(','))
+            # Remove superfluous characters
+            unclean_value = value
+            value = value.strip(" ").strip("'")
+            # print("Selected argument: [%s]" % value)
+        else:
+            value = attrib_value
+
+        # For each type, define some possible replacement values
+        choices_number =    ( \
+                                "0", \
+                                "11111", \
+                                "-128", \
+                                "2", \
+                                "-1", \
+                                "1/3", \
+                                "42/0", \
+                                "1094861636 idiv 1.0", \
+                                "-1123329771506872 idiv 3.8", \
+                                "17=$numericRTF", \
+                                str(3 + random.randrange(0, 100)), \
+                            )
+
+        choices_letter =    ( \
+                                "P" * (25 * random.randrange(1, 100)), \
+                                "%s%s%s%s%s%s", \
+                                "foobar", \
+                            )
+
+        choices_alnum =     ( \
+                                "Abc123", \
+                                "020F0302020204030204", \
+                                "020F0302020204030204" * (random.randrange(5, 20)), \
+                            )
+
+        # Fuzz the value
+        if random.choice((True,False)) and value == "":
+
+            # Empty
+            new_value = value
+
+        elif random.choice((True,False)) and value.isdigit():
+
+            # Numbers
+            new_value = random.choice(choices_number)
+
+        elif random.choice((True,False)) and value.isalpha():
+
+            # Letters
+            new_value = random.choice(choices_letter)
+
+        elif random.choice((True,False)) and value.isalnum():
+
+            # Alphanumeric
+            new_value = random.choice(choices_alnum)
+
+        else:
+
+            # Default type
+            new_value = random.choice(choices_alnum + choices_letter + choices_number)
+
+        # If we worked on a substring, apply changes to the whole string
+        if value != attrib_value:
+            # No ' around empty values
+            if new_value != "" and value != "":
+                new_value = "'" + new_value + "'"
+            # Apply changes
+            new_value = attrib_value.replace(unclean_value, new_value)
+
+        # Log something
+        if self.verbose:
+            print("Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+
+        # Modify the attribute
+        rand_elem.set(rand_attrib, new_value.decode("utf-8"))
+
+    def __del_node_and_children (self):
+
+        """ High-level minimizing mutator
+            Delete a random node and its children (i.e. delete a random tree) """
+
+        self.__del_node(True)
+
+    def __del_node_but_children (self):
+
+        """ High-level minimizing mutator
+            Delete a random node but its children (i.e. link them to the parent of the deleted node) """
+
+        self.__del_node(False)
+
+    def __del_node (self, delete_children):
+    
+        """ Called by the __del_node_* mutators """
+
+        # Select a node to modify (but the root one)
+        (rand_elem_id, rand_elem) = self.__pick_element (exclude_root_node = True)
+
+        # If the document includes only a top-level element
+        # Then we can't pick a element (given that "exclude_root_node = True") 
+
+        # Is the document deep enough?
+        if rand_elem is None:
+            if self.verbose:
+                print("Can't delete a node: document not deep enough!")
+            return
+
+        # Log something
+        if self.verbose:
+            but_or_and = "and" if delete_children else "but"
+            print("Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and))
+
+        if delete_children is False:
+            # Link children of the random (soon to be deleted) node to its parent
+            for child in rand_elem:
+                rand_elem.getparent().append(child)
+            
+        # Remove the node
+        rand_elem.getparent().remove(rand_elem)
+
+    def __del_content (self):
+    
+        """ High-level minimizing mutator
+            Delete the attributes and children of a random node """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Log something
+        if self.verbose:
+            print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))
+
+        # Reset the node
+        rand_elem.clear()
+
+    def __del_attribute (self):
+     
+        """ High-level minimizing mutator
+            Delete a random attribute from a random node """
+
+        # Select a node to modify
+        (rand_elem_id, rand_elem) = self.__pick_element()
+
+        # Get all the attributes
+        attribs = rand_elem.keys()
+
+        # Is there attributes?
+        if len(attribs) < 1:
+            if self.verbose:
+                print("No attribute: can't delete!")
+            return
+
+        # Pick a random attribute
+        rand_attrib_id = random.randint (0, len(attribs) - 1)
+        rand_attrib = attribs[rand_attrib_id]
+
+        # Log something
+        if self.verbose:
+            print("Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+
+        # Delete the attribute
+        rand_elem.attrib.pop(rand_attrib)
+
+    def mutate (self, min=1, max=5):
+    
+        """ Execute some high-level mutators between $min and $max times, then some medium-level ones """
+
+        # High-level mutation
+        self.__exec_among(self, self.hl_mutators_all, min, max)
+
diff --git a/examples/python_mutators/common.py b/examples/python_mutators/common.py
new file mode 100644
index 00000000..28b8ee80
--- /dev/null
+++ b/examples/python_mutators/common.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Module containing functions shared between multiple AFL modules
+
+@author:     Christian Holler (:decoder)
+
+@license:
+
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+@contact:    choller@mozilla.com
+'''
+
+from __future__ import print_function
+import random
+import os
+import re
+
+def randel(l):
+    if not l:
+        return None
+    return l[random.randint(0,len(l)-1)]
+
+def randel_pop(l):
+    if not l:
+        return None
+    return l.pop(random.randint(0,len(l)-1))
+
+def write_exc_example(data, exc):
+    exc_name = re.sub(r'[^a-zA-Z0-9]', '_', repr(exc))
+    
+    if not os.path.exists(exc_name):
+        with open(exc_name, 'w') as f:
+            f.write(data)    
diff --git a/examples/python_mutators/example.py b/examples/python_mutators/example.py
new file mode 100644
index 00000000..d32a7eb2
--- /dev/null
+++ b/examples/python_mutators/example.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Example Python Module for AFLFuzz
+
+@author:     Christian Holler (:decoder)
+
+@license:
+
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+@contact:    choller@mozilla.com
+'''
+
+import random
+
+def init(seed):
+    '''
+    Called once when AFLFuzz starts up. Used to seed our RNG.
+    
+    @type seed: int
+    @param seed: A 32-bit random value
+    '''
+    random.seed(seed)
+    return 0
+
+def fuzz(buf, add_buf):
+    '''
+    Called per fuzzing iteration.
+    
+    @type buf: bytearray
+    @param buf: The buffer that should be mutated.
+    
+    @type add_buf: bytearray
+    @param add_buf: A second buffer that can be used as mutation source.
+    
+    @rtype: bytearray
+    @return: A new bytearray containing the mutated data
+    '''
+    ret = bytearray(buf)
+    # Do something interesting with ret
+
+    return ret
+
+# Uncomment and implement the following methods if you want to use a custom
+# trimming algorithm. See also the documentation for a better API description.
+
+# def init_trim(buf):
+#     '''
+#     Called per trimming iteration.
+#     
+#     @type buf: bytearray
+#     @param buf: The buffer that should be trimmed.
+#     
+#     @rtype: int
+#     @return: The maximum number of trimming steps.
+#     '''
+#     global ...
+#     
+#     # Initialize global variables
+#     
+#     # Figure out how many trimming steps are possible.
+#     # If this is not possible for your trimming, you can
+#     # return 1 instead and always return 0 in post_trim
+#     # until you are done (then you return 1).
+#         
+#     return steps
+# 
+# def trim():
+#     '''
+#     Called per trimming iteration.
+# 
+#     @rtype: bytearray
+#     @return: A new bytearray containing the trimmed data.
+#     '''
+#     global ...
+#     
+#     # Implement the actual trimming here
+#     
+#     return bytearray(...)
+# 
+# def post_trim(success):
+#     '''
+#     Called after each trimming operation.
+#     
+#     @type success: bool
+#     @param success: Indicates if the last trim operation was successful.
+#     
+#     @rtype: int
+#     @return: The next trim index (0 to max number of steps) where max
+#              number of steps indicates the trimming is done.
+#     '''
+#     global ...
+# 
+#     if not success:
+#         # Restore last known successful input, determine next index
+#     else:
+#         # Just determine the next index, based on what was successfully
+#         # removed in the last step
+#     
+#     return next_index
diff --git a/examples/python_mutators/simple-chunk-replace.py b/examples/python_mutators/simple-chunk-replace.py
new file mode 100644
index 00000000..218dd4f8
--- /dev/null
+++ b/examples/python_mutators/simple-chunk-replace.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# encoding: utf-8
+'''
+Simple Chunk Cross-Over Replacement Module for AFLFuzz
+
+@author:     Christian Holler (:decoder)
+
+@license:
+
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+@contact:    choller@mozilla.com
+'''
+
+import random
+
+def init(seed):
+    '''
+    Called once when AFLFuzz starts up. Used to seed our RNG.
+    
+    @type seed: int
+    @param seed: A 32-bit random value
+    '''
+    # Seed our RNG
+    random.seed(seed)
+    return 0
+
+def fuzz(buf, add_buf):
+    '''
+    Called per fuzzing iteration.
+    
+    @type buf: bytearray
+    @param buf: The buffer that should be mutated.
+    
+    @type add_buf: bytearray
+    @param add_buf: A second buffer that can be used as mutation source.
+    
+    @rtype: bytearray
+    @return: A new bytearray containing the mutated data
+    '''
+    # Make a copy of our input buffer for returning
+    ret = bytearray(buf)
+
+    # Take a random fragment length between 2 and 32 (or less if add_buf is shorter)
+    fragment_len = random.randint(1, min(len(add_buf), 32))
+    
+    # Determine a random source index where to take the data chunk from
+    rand_src_idx = random.randint(0, len(add_buf) - fragment_len)
+    
+    # Determine a random destination index where to put the data chunk
+    rand_dst_idx = random.randint(0, len(buf))
+
+    # Make the chunk replacement
+    ret[rand_dst_idx:rand_dst_idx + fragment_len] = add_buf[rand_src_idx:rand_src_idx + fragment_len]
+
+    # Return data
+    return ret
diff --git a/examples/python_mutators/wrapper_afl_min.py b/examples/python_mutators/wrapper_afl_min.py
new file mode 100644
index 00000000..df09b40a
--- /dev/null
+++ b/examples/python_mutators/wrapper_afl_min.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+from XmlMutatorMin import XmlMutatorMin
+
+# Default settings (production mode)
+
+__mutator__ = None
+__seed__ = "RANDOM"
+__log__ = False
+__log_file__ = "wrapper.log"
+
+# AFL functions
+
+def log(text):
+    """
+          Logger
+    """
+
+    global __seed__
+    global __log__
+    global __log_file__
+
+    if __log__:
+        with open(__log_file__, "a") as logf:
+            logf.write("[%s] %s\n" % (__seed__, text))
+
+def init(seed):
+    """
+          Called once when AFL starts up. Seed is used to identify the AFL instance in log files
+    """
+
+    global __mutator__
+    global __seed__
+
+    # Get the seed
+    __seed__ = seed
+
+    # Create a global mutation class
+    try:
+	__mutator__ = XmlMutatorMin(__seed__, verbose=__log__)
+        log("init(): Mutator created")
+    except RuntimeError as e:
+        log("init(): Can't create mutator: %s" % e.message)
+
+def fuzz(buf, add_buf):
+    """
+          Called for each fuzzing iteration.
+    """
+
+    global __mutator__ 
+
+    # Do we have a working mutator object?
+    if __mutator__ is None:
+        log("fuzz(): Can't fuzz, no mutator available")
+        return buf
+
+    # Try to use the AFL buffer
+    via_buffer = True
+
+    # Interpret the AFL buffer (an array of bytes) as a string
+    if via_buffer:
+        try:
+            buf_str = str(buf)
+            log("fuzz(): AFL buffer converted to a string")
+        except:
+            via_buffer = False
+            log("fuzz(): Can't convert AFL buffer to a string")
+
+    # Load XML from the AFL string
+    if via_buffer:
+        try:
+            __mutator__.init_from_string(buf_str)
+            log("fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" % len(buf_str))
+        except:
+            via_buffer = False
+            log("fuzz(): Can't initialize mutator with AFL buffer")
+
+    # If init from AFL buffer wasn't succesful
+    if not via_buffer:
+         log("fuzz(): Returning unmodified AFL buffer")
+         return buf
+
+    # Sucessful initialization -> mutate
+    try:
+        __mutator__.mutate(max=5)
+        log("fuzz(): Input mutated")
+    except:
+        log("fuzz(): Can't mutate input => returning buf")
+        return buf
+            
+    # Convert mutated data to a array of bytes
+    try:
+        data = bytearray(__mutator__.save_to_string())
+        log("fuzz(): Mutated data converted as bytes")
+    except:
+        log("fuzz(): Can't convert mutated data to bytes => returning buf")
+        return buf
+
+    # Everything went fine, returning mutated content
+    log("fuzz(): Returning %d bytes" % len(data))
+    return data
+
+# Main (for debug)
+
+if __name__ == '__main__':
+
+    __log__ = True
+    __log_file__ = "/dev/stdout"
+    __seed__ = "RANDOM"
+
+    init(__seed__)
+
+    in_1 = bytearray("<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>")
+    in_2 = bytearray("<abc abc123='456' abcCBA='ppppppppppppppppppppppppppppp'/>")
+    out = fuzz(in_1, in_2)
+    print(out)
+