about summary refs log tree commit diff
path: root/custom_mutators/grammatron/preprocess/construct_automata.py
diff options
context:
space:
mode:
Diffstat (limited to 'custom_mutators/grammatron/preprocess/construct_automata.py')
-rw-r--r--custom_mutators/grammatron/preprocess/construct_automata.py275
1 files changed, 0 insertions, 275 deletions
diff --git a/custom_mutators/grammatron/preprocess/construct_automata.py b/custom_mutators/grammatron/preprocess/construct_automata.py
deleted file mode 100644
index b9e84aa8..00000000
--- a/custom_mutators/grammatron/preprocess/construct_automata.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import sys
-import json
-import re
-from collections import defaultdict
-# import pygraphviz as pgv
-
-gram_data = None
-state_count = 1
-pda = []
-worklist = []
-state_stacks = {} 
-
-# === If user provides upper bound on the stack size during FSA creation ===
-# Specifies the upper bound to which the stack is allowed to grow
-# If for any generated state, the stack size is >= stack_limit then this
-# state is not expanded further.
-stack_limit = None 
-# Holds the set of unexpanded rules owing to the user-passed stack constraint limit
-unexpanded_rules = set()
-
-def main(grammar, limit):
-    global worklist, gram_data, stack_limit
-    current = '0'
-    stack_limit = limit
-    if stack_limit:
-        print ('[X] Operating in bounded stack mode')
-
-    with open(grammar, 'r') as fd:
-        gram_data = json.load(fd)
-    start_symbol = gram_data["Start"][0]
-    worklist.append([current, [start_symbol]])
-    # print (grammar)
-    filename = (grammar.split('/')[-1]).split('.')[0]
-    
-
-    while worklist:
-        # Take an element from the worklist
-        # print ('================')
-        # print ('Worklist:', worklist)
-        element = worklist.pop(0)
-        prep_transitions(element)
-    
-    pda_file = filename + '_transition.json'
-    graph_file = filename + '.png'
-    # print ('XXXXXXXXXXXXXXXX')
-    # print ('PDA file:%s Png graph file:%s' % (pda_file, graph_file))
-    # XXX Commented out because visualization of current version of PHP causes segfault
-    # Create the graph and dump the transitions to a file
-    # create_graph(filename)
-    transformed = postprocess()
-    with open(filename + '_automata.json', 'w+') as fd:
-        json.dump(transformed, fd)
-    with open(filename + '_transition.json', 'w+') as fd:
-        json.dump(pda, fd)
-    if not unexpanded_rules:
-        print ('[X] No unexpanded rules, absolute FSA formed')
-        exit(0)
-    else:
-        print ('[X] Certain rules were not expanded due to stack size limit. Inexact approximation has been created and the disallowed rules have been put in {}_disallowed.json'.format(filename))
-        print ('[X] Number of unexpanded rules:', len(unexpanded_rules))
-        with open(filename + '_disallowed.json', 'w+') as fd:
-            json.dump(list(unexpanded_rules), fd)
-
-def create_graph(filename):
-    '''
-    Creates a DOT representation of the PDA
-    '''
-    global pda
-    G = pgv.AGraph(strict = False, directed = True)
-    for transition in pda:
-        print ('Transition:', transition)
-        G.add_edge(transition['source'], transition['dest'], 
-                label = 'Term:{}'.format(transition['terminal']))
-    G.layout(prog = 'dot')
-    print ('Do it up 2')
-    G.draw(filename + '.png')
-
-def prep_transitions(element):
-    '''
-    Generates transitions
-    '''
-    global gram_data, state_count, pda, worklist, state_stacks, stack_limit, unexpanded_rules
-    state = element[0]
-    try:
-        nonterminal = element[1][0] 
-    except IndexError:
-        # Final state was encountered, pop from worklist without doing anything
-        return
-    rules = gram_data[nonterminal]
-    count = 1
-    for rule in rules:
-        isRecursive  = False
-        # print ('Current state:', state)
-        terminal, ss, termIsRegex = tokenize(rule)
-        transition = get_template()
-        transition['trigger'] = '_'.join([state, str(count)])
-        transition['source'] = state
-        transition['dest'] = str(state_count) 
-        transition['ss'] = ss 
-        transition['terminal'] = terminal
-        transition['rule'] = "{} -> {}".format(nonterminal, rule )
-        if termIsRegex:
-            transition['termIsRegex'] = True
-        
-        # Creating a state stack for the new state
-        try:
-            state_stack = state_stacks[state][:]
-        except:
-            state_stack = []
-        if len(state_stack):
-            state_stack.pop(0)
-        if ss:
-            for symbol in ss[::-1]:
-                state_stack.insert(0, symbol)
-        transition['stack'] = state_stack 
-
-        # Check if a recursive transition state being created, if so make a backward
-        # edge and don't add anything to the worklist
-        # print (state_stacks)
-        if state_stacks:
-            for state_element, stack in state_stacks.items():
-                # print ('Stack:', sorted(stack))
-                # print ('State stack:', sorted(state_stack))
-                if sorted(stack) == sorted(state_stack):
-                    transition['dest'] = state_element
-                    # print ('Recursive:', transition)
-                    pda.append(transition)
-                    count += 1
-                    isRecursive = True
-                    break 
-        # If a recursive transition exercised don't add the same transition as a new
-        # edge, continue onto the next transitions
-        if isRecursive:
-            continue
-            
-        # If the generated state has a stack size > stack_limit then that state is abandoned
-        # and not added to the FSA or the worklist for further expansion
-        if stack_limit:
-            if (len(transition['stack']) > stack_limit):
-                unexpanded_rules.add(transition['rule'])
-                continue
-
-        # Create transitions for the non-recursive relations and add to the worklist
-        # print ('Normal:', transition)
-        # print ('State2:', state)
-        pda.append(transition)
-        worklist.append([transition['dest'], transition['stack']])
-        state_stacks[transition['dest']] = state_stack
-        state_count += 1
-        count += 1
-
-def tokenize(rule):
-    '''
-    Gets the terminal and the corresponding stack symbols from a rule in GNF form
-    '''
-    pattern = re.compile("([r])*\'([\s\S]+)\'([\s\S]*)")
-    terminal = None
-    ss = None
-    termIsRegex = False
-    match = pattern.match(rule)
-    if match.group(1):
-        termIsRegex = True
-    if match.group(2):
-        terminal = match.group(2)
-    else:
-        raise AssertionError("Rule is not in GNF form")
-
-    if match.group(3):
-        ss = (match.group(3)).split()
-
-    return terminal, ss, termIsRegex
-
-def get_template():
-    transition_template = {
-            'trigger':None,
-            'source': None,
-            'dest': None,
-            'termIsRegex': False,
-            'terminal' : None,
-            'stack': []
-            }
-    return transition_template
-
-def postprocess():
-    '''
-    Creates a representation to be passed on to the C-module
-    '''
-    global pda
-    final_struct = {}
-    memoized = defaultdict(list)
-    # Supporting data structures for if stack limit is imposed
-    culled_pda = []
-    culled_final = []
-    num_transitions = 0 # Keep track of number of transitions
-
-
-    states, final, initial = _get_states()
-
-    print (initial)
-    assert len(initial) == 1, 'More than one init state found'
-
-    # Cull transitions to states which were not expanded owing to the stack limit
-    if stack_limit:
-
-        blocklist = []
-        for final_state in final:
-            for transition in pda:
-                if (transition["dest"] == final_state) and (len(transition["stack"]) > 0):
-                    blocklist.append(transition["dest"])
-                    continue
-                else:
-                    culled_pda.append(transition)
-        
-        culled_final = [state for state in final if state not in blocklist]
-
-        assert len(culled_final) == 1, 'More than one final state found'
-
-        for transition in culled_pda:
-            state = transition["source"]
-            if transition["dest"] in blocklist:
-                    continue 
-            num_transitions += 1
-            memoized[state].append([transition["trigger"], transition["dest"], 
-                transition["terminal"]])
-        final_struct["init_state"] = initial
-        final_struct["final_state"] = culled_final[0]
-        # The reason we do this is because when states are culled, the indexing is
-        # still relative to the actual number of states hence we keep numstates recorded
-        # as the original number of states
-        print ('[X] Actual Number of states:', len(memoized.keys()))
-        print ('[X] Number of transitions:', num_transitions)
-        print ('[X] Original Number of states:', len(states))
-        final_struct["numstates"] = len(states) 
-        final_struct["pda"] = memoized
-        return final_struct
-    
-    # Running FSA construction in exact approximation mode and postprocessing it like so
-    for transition in pda:
-       state = transition["source"]
-       memoized[state].append([transition["trigger"], transition["dest"], 
-           transition["terminal"]])
-
-    final_struct["init_state"] = initial
-    final_struct["final_state"] = final[0]
-    print ('[X] Actual Number of states:', len(memoized.keys()))
-    final_struct["numstates"] = len(memoized.keys()) 
-    final_struct["pda"] = memoized
-    return final_struct
-
-
-def _get_states():
-    source = set()
-    dest = set()
-    global pda
-    for transition in pda:
-        source.add(transition["source"])
-        dest.add(transition["dest"])
-    source_copy = source.copy()
-    source_copy.update(dest)
-    return list(source_copy), list(dest.difference(source)), str(''.join(list(source.difference(dest))))
-
-if __name__ == '__main__':
-    import argparse
-    parser = argparse.ArgumentParser(description = 'Script to convert GNF grammar to PDA')
-    parser.add_argument(
-            '--gf',
-            type = str,
-            help = 'Location of GNF grammar')
-    parser.add_argument(
-            '--limit',
-            type = int,
-            default = None,
-            help = 'Specify the upper bound for the stack size')
-    args = parser.parse_args()
-    main(args.gf, args.limit)