aboutsummaryrefslogtreecommitdiff
path: root/custom_mutators/grammatron/preprocess/construct_automata.py
diff options
context:
space:
mode:
authorvanhauser-thc <vh@thc.org>2021-07-09 10:32:14 +0200
committervanhauser-thc <vh@thc.org>2021-07-09 10:32:14 +0200
commite1d5009229fb5cea5845cd08e0abdc8fe440ee86 (patch)
tree5855fd6e27d83b5d7eb2c455a8bb3bc7b43403a4 /custom_mutators/grammatron/preprocess/construct_automata.py
parentfd4acc935efe78a340395ca386b856930f7e6b22 (diff)
downloadafl++-e1d5009229fb5cea5845cd08e0abdc8fe440ee86.tar.gz
fixes
Diffstat (limited to 'custom_mutators/grammatron/preprocess/construct_automata.py')
-rw-r--r--custom_mutators/grammatron/preprocess/construct_automata.py275
1 files changed, 0 insertions, 275 deletions
diff --git a/custom_mutators/grammatron/preprocess/construct_automata.py b/custom_mutators/grammatron/preprocess/construct_automata.py
deleted file mode 100644
index b9e84aa8..00000000
--- a/custom_mutators/grammatron/preprocess/construct_automata.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import sys
-import json
-import re
-from collections import defaultdict
-# import pygraphviz as pgv
-
-gram_data = None
-state_count = 1
-pda = []
-worklist = []
-state_stacks = {}
-
-# === If user provides upper bound on the stack size during FSA creation ===
-# Specifies the upper bound to which the stack is allowed to grow
-# If for any generated state, the stack size is >= stack_limit then this
-# state is not expanded further.
-stack_limit = None
-# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
-unexpanded_rules = set()
-
-def main(grammar, limit):
- global worklist, gram_data, stack_limit
- current = '0'
- stack_limit = limit
- if stack_limit:
- print ('[X] Operating in bounded stack mode')
-
- with open(grammar, 'r') as fd:
- gram_data = json.load(fd)
- start_symbol = gram_data["Start"][0]
- worklist.append([current, [start_symbol]])
- # print (grammar)
- filename = (grammar.split('/')[-1]).split('.')[0]
-
-
- while worklist:
- # Take an element from the worklist
- # print ('================')
- # print ('Worklist:', worklist)
- element = worklist.pop(0)
- prep_transitions(element)
-
- pda_file = filename + '_transition.json'
- graph_file = filename + '.png'
- # print ('XXXXXXXXXXXXXXXX')
- # print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
- # XXX Commented out because visualization of current version of PHP causes segfault
- # Create the graph and dump the transitions to a file
- # create_graph(filename)
- transformed = postprocess()
- with open(filename + '_automata.json', 'w+') as fd:
- json.dump(transformed, fd)
- with open(filename + '_transition.json', 'w+') as fd:
- json.dump(pda, fd)
- if not unexpanded_rules:
- print ('[X] No unexpanded rules, absolute FSA formed')
- exit(0)
- else:
- print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
- print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
- with open(filename + '_disallowed.json', 'w+') as fd:
- json.dump(list(unexpanded_rules), fd)
-
-def create_graph(filename):
- '''
- Creates a DOT representation of the PDA
- '''
- global pda
- G = pgv.AGraph(strict = False, directed = True)
- for transition in pda:
- print ('Transition:', transition)
- G.add_edge(transition['source'], transition['dest'],
- label = 'Term:{}'.format(transition['terminal']))
- G.layout(prog = 'dot')
- print ('Do it up 2')
- G.draw(filename + '.png')
-
-def prep_transitions(element):
- '''
- Generates transitions
- '''
- global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
- state = element[0]
- try:
- nonterminal = element[1][0]
- except IndexError:
- # Final state was encountered, pop from worklist without doing anything
- return
- rules = gram_data[nonterminal]
- count = 1
- for rule in rules:
- isRecursive = False
- # print ('Current state:', state)
- terminal, ss, termIsRegex = tokenize(rule)
- transition = get_template()
- transition['trigger'] = '_'.join([state, str(count)])
- transition['source'] = state
- transition['dest'] = str(state_count)
- transition['ss'] = ss
- transition['terminal'] = terminal
- transition['rule'] = "{} -> {}".format(nonterminal, rule )
- if termIsRegex:
- transition['termIsRegex'] = True
-
- # Creating a state stack for the new state
- try:
- state_stack = state_stacks[state][:]
- except:
- state_stack = []
- if len(state_stack):
- state_stack.pop(0)
- if ss:
- for symbol in ss[::-1]:
- state_stack.insert(0, symbol)
- transition['stack'] = state_stack
-
- # Check if a recursive transition state being created, if so make a backward
- # edge and don't add anything to the worklist
- # print (state_stacks)
- if state_stacks:
- for state_element, stack in state_stacks.items():
- # print ('Stack:', sorted(stack))
- # print ('State stack:', sorted(state_stack))
- if sorted(stack) == sorted(state_stack):
- transition['dest'] = state_element
- # print ('Recursive:', transition)
- pda.append(transition)
- count += 1
- isRecursive = True
- break
- # If a recursive transition exercised don't add the same transition as a new
- # edge, continue onto the next transitions
- if isRecursive:
- continue
-
- # If the generated state has a stack size > stack_limit then that state is abandoned
- # and not added to the FSA or the worklist for further expansion
- if stack_limit:
- if (len(transition['stack']) > stack_limit):
- unexpanded_rules.add(transition['rule'])
- continue
-
- # Create transitions for the non-recursive relations and add to the worklist
- # print ('Normal:', transition)
- # print ('State2:', state)
- pda.append(transition)
- worklist.append([transition['dest'], transition['stack']])
- state_stacks[transition['dest']] = state_stack
- state_count += 1
- count += 1
-
-def tokenize(rule):
- '''
- Gets the terminal and the corresponding stack symbols from a rule in GNF form
- '''
- pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
- terminal = None
- ss = None
- termIsRegex = False
- match = pattern.match(rule)
- if match.group(1):
- termIsRegex = True
- if match.group(2):
- terminal = match.group(2)
- else:
- raise AssertionError("Rule is not in GNF form")
-
- if match.group(3):
- ss = (match.group(3)).split()
-
- return terminal, ss, termIsRegex
-
-def get_template():
- transition_template = {
- 'trigger':None,
- 'source': None,
- 'dest': None,
- 'termIsRegex': False,
- 'terminal' : None,
- 'stack': []
- }
- return transition_template
-
-def postprocess():
- '''
- Creates a representation to be passed on to the C-module
- '''
- global pda
- final_struct = {}
- memoized = defaultdict(list)
- # Supporting data structures for if stack limit is imposed
- culled_pda = []
- culled_final = []
- num_transitions = 0 # Keep track of number of transitions
-
-
- states, final, initial = _get_states()
-
- print (initial)
- assert len(initial) == 1, 'More than one init state found'
-
- # Cull transitions to states which were not expanded owing to the stack limit
- if stack_limit:
-
- blocklist = []
- for final_state in final:
- for transition in pda:
- if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
- blocklist.append(transition["dest"])
- continue
- else:
- culled_pda.append(transition)
-
- culled_final = [state for state in final if state not in blocklist]
-
- assert len(culled_final) == 1, 'More than one final state found'
-
- for transition in culled_pda:
- state = transition["source"]
- if transition["dest"] in blocklist:
- continue
- num_transitions += 1
- memoized[state].append([transition["trigger"], transition["dest"],
- transition["terminal"]])
- final_struct["init_state"] = initial
- final_struct["final_state"] = culled_final[0]
- # The reason we do this is because when states are culled, the indexing is
- # still relative to the actual number of states hence we keep numstates recorded
- # as the original number of states
- print ('[X] Actual Number of states:', len(memoized.keys()))
- print ('[X] Number of transitions:', num_transitions)
- print ('[X] Original Number of states:', len(states))
- final_struct["numstates"] = len(states)
- final_struct["pda"] = memoized
- return final_struct
-
- # Running FSA construction in exact approximation mode and postprocessing it like so
- for transition in pda:
- state = transition["source"]
- memoized[state].append([transition["trigger"], transition["dest"],
- transition["terminal"]])
-
- final_struct["init_state"] = initial
- final_struct["final_state"] = final[0]
- print ('[X] Actual Number of states:', len(memoized.keys()))
- final_struct["numstates"] = len(memoized.keys())
- final_struct["pda"] = memoized
- return final_struct
-
-
-def _get_states():
- source = set()
- dest = set()
- global pda
- for transition in pda:
- source.add(transition["source"])
- dest.add(transition["dest"])
- source_copy = source.copy()
- source_copy.update(dest)
- return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
-
-if __name__ == '__main__':
- import argparse
- parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
- parser.add_argument(
- '--gf',
- type = str,
- help = 'Location of GNF grammar')
- parser.add_argument(
- '--limit',
- type = int,
- default = None,
- help = 'Specify the upper bound for the stack size')
- args = parser.parse_args()
- main(args.gf, args.limit)