about summary refs log tree commit diff
path: root/custom_mutators/gramatron/automaton-parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'custom_mutators/gramatron/automaton-parser.c')
-rw-r--r--custom_mutators/gramatron/automaton-parser.c476
1 files changed, 312 insertions, 164 deletions
diff --git a/custom_mutators/gramatron/automaton-parser.c b/custom_mutators/gramatron/automaton-parser.c
index 3265e0cf..266f5a07 100644
--- a/custom_mutators/gramatron/automaton-parser.c
+++ b/custom_mutators/gramatron/automaton-parser.c
@@ -2,287 +2,409 @@
 #include "automaton-parser.h"
 
 int free_terminal_arr(any_t placeholder, any_t item) {
-  struct terminal_arr* tmp = item;
+
+  struct terminal_arr *tmp = item;
   free(tmp->start);
   free(tmp);
   return MAP_OK;
+
 }
 
-int compare_two_symbols(const void * a, const void * b) {
-    char* a_char = *(char **)a;
-    char* b_char = *(char **)b;
-    size_t fa = strlen(a_char);
-    size_t fb = strlen(b_char);
-    if (fa > fb) return -1;
-    else if (fa == fb) return 0;
-    else return 1;
+int compare_two_symbols(const void *a, const void *b) {
+
+  char * a_char = *(char **)a;
+  char * b_char = *(char **)b;
+  size_t fa = strlen(a_char);
+  size_t fb = strlen(b_char);
+  if (fa > fb)
+    return -1;
+  else if (fa == fb)
+    return 0;
+  else
+    return 1;
 
 }
 
 // TODO: create a map
-// key: first character of a symbol, value: a list of symbols that starts with key, the list is sorted in descending order of the symbol lengths
-map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols, struct symbols_arr *first_chars) {
+// key: first character of a symbol, value: a list of symbols that starts with
+// key, the list is sorted in descending order of the symbol lengths
+map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols,
+                                           struct symbols_arr *first_chars) {
+
   map_t char_to_symbols = hashmap_new();
   // TODO: free the allocated map
   // sort the symbol_dict in descending order of the symbol lengths
-  qsort(symbols->symbols_arr, symbols->len, sizeof(char*), compare_two_symbols);
-  #ifdef DEBUG
+  qsort(symbols->symbols_arr, symbols->len, sizeof(char *),
+        compare_two_symbols);
+#ifdef DEBUG
   printf("------ print after sort ------\n");
   print_symbols_arr(symbols);
-  #endif
+#endif
   size_t i;
-  int r; // response from hashmap get and put
+  int    r;  // response from hashmap get and put
   for (i = 0; i < symbols->len; i++) {
-    char* symbol_curr = symbols->symbols_arr[i];
+
+    char *symbol_curr = symbols->symbols_arr[i];
     // get first character from symbol_curr
     char first_character[2];
     first_character[0] = symbol_curr[0];
     first_character[1] = '\0';
-    #ifdef DEBUG
-    printf("****** Current symbol is %s, its first character is %s ******\n", symbol_curr, first_character);
-    #endif
+#ifdef DEBUG
+    printf("****** Current symbol is %s, its first character is %s ******\n",
+           symbol_curr, first_character);
+#endif
     // key would be the first character of symbol_curr
     // the value would be an array of chars
-    struct symbols_arr* associated_symbols;
-    r = hashmap_get(char_to_symbols, first_character, (any_t*)&associated_symbols);
+    struct symbols_arr *associated_symbols;
+    r = hashmap_get(char_to_symbols, first_character,
+                    (any_t *)&associated_symbols);
     if (!r) {
-      // append current symbol to existing array
-      #ifdef DEBUG
-      printf("****** First character %s is already in hashmap ******\n", first_character);
-      #endif
-      if(!add_element_to_symbols_arr(associated_symbols, symbol_curr, strlen(symbol_curr) + 1)) {
+
+// append current symbol to existing array
+#ifdef DEBUG
+      printf("****** First character %s is already in hashmap ******\n",
+             first_character);
+#endif
+      if (!add_element_to_symbols_arr(associated_symbols, symbol_curr,
+                                      strlen(symbol_curr) + 1)) {
+
         free_hashmap(char_to_symbols, &free_array_of_chars);
         return NULL;
+
       }
-    }
-    else {
-      // start a new symbols_arr
-      #ifdef DEBUG
-      printf("****** First character %s is not in hashmap ******\n", first_character);
-      #endif
-      struct symbols_arr* new_associated_symbols = create_array_of_chars();
-      strncpy(first_chars->symbols_arr[first_chars->len], first_character, 2); // 2 because one character plus the NULL byte
-      add_element_to_symbols_arr(new_associated_symbols, symbol_curr, strlen(symbol_curr) + 1);
-      r = hashmap_put(char_to_symbols, first_chars->symbols_arr[first_chars->len], new_associated_symbols);
+
+    } else {
+
+// start a new symbols_arr
+#ifdef DEBUG
+      printf("****** First character %s is not in hashmap ******\n",
+             first_character);
+#endif
+      struct symbols_arr *new_associated_symbols = create_array_of_chars();
+      strncpy(first_chars->symbols_arr[first_chars->len], first_character,
+              2);  // 2 because one character plus the NULL byte
+      add_element_to_symbols_arr(new_associated_symbols, symbol_curr,
+                                 strlen(symbol_curr) + 1);
+      r = hashmap_put(char_to_symbols,
+                      first_chars->symbols_arr[first_chars->len],
+                      new_associated_symbols);
       first_chars->len++;
-      #ifdef DEBUG
+#ifdef DEBUG
       if (r) {
+
         printf("hashmap put failed\n");
-      }
-      else {
+
+      } else {
+
         printf("hashmap put succeeded\n");
+
       }
-      #endif
+
+#endif
+
     }
+
   }
+
   printf("****** Testing ******\n");
-  struct symbols_arr* tmp_arr;
-  char str[] = "i";
-  int t = hashmap_get(char_to_symbols, str, (any_t *)&tmp_arr);
-  if (!t)
-    print_symbols_arr(tmp_arr);
+  struct symbols_arr *tmp_arr;
+  char                str[] = "i";
+  int                 t = hashmap_get(char_to_symbols, str, (any_t *)&tmp_arr);
+  if (!t) print_symbols_arr(tmp_arr);
   return char_to_symbols;
+
 }
 
-struct symbols_arr* create_array_of_chars() {
-  struct symbols_arr* ret = (struct symbols_arr*)malloc(sizeof(struct symbols_arr));
-  ret->len = 0; 
-  ret->symbols_arr = (char **)malloc(MAX_TERMINAL_NUMS * sizeof(char*));
+struct symbols_arr *create_array_of_chars() {
+
+  struct symbols_arr *ret =
+      (struct symbols_arr *)malloc(sizeof(struct symbols_arr));
+  ret->len = 0;
+  ret->symbols_arr = (char **)malloc(MAX_TERMINAL_NUMS * sizeof(char *));
   size_t i;
   for (i = 0; i < MAX_TERMINAL_NUMS; i++) {
+
     ret->symbols_arr[i] = (char *)calloc(MAX_TERMINAL_LENGTH, sizeof(char));
+
   }
+
   return ret;
+
 }
 
 // map a symbol to a list of (state, trigger_idx)
-map_t create_pda_hashmap(state* pda, struct symbols_arr* symbols_arr) {
-  int state_idx, trigger_idx, r; // r is the return result for hashmap operation 
+map_t create_pda_hashmap(state *pda, struct symbols_arr *symbols_arr) {
+
+  int state_idx, trigger_idx,
+      r;  // r is the return result for hashmap operation
   map_t m = hashmap_new();
   // iterate over pda
   for (state_idx = 0; state_idx < numstates; state_idx++) {
-    #ifdef DEBUG
+
+#ifdef DEBUG
     printf("------ The state idx is %d ------\n", state_idx);
-    #endif
+#endif
     if (state_idx == final_state) continue;
-    state* state_curr = pda + state_idx;
-    for (trigger_idx = 0; trigger_idx < state_curr->trigger_len; trigger_idx++) {
-      #ifdef DEBUG
+    state *state_curr = pda + state_idx;
+    for (trigger_idx = 0; trigger_idx < state_curr->trigger_len;
+         trigger_idx++) {
+
+#ifdef DEBUG
       printf("------ The trigger idx is %d ------\n", trigger_idx);
-      #endif
-      trigger* trigger_curr = state_curr->ptr + trigger_idx;
-      char* symbol_curr = trigger_curr->term;
-      size_t symbol_len = trigger_curr->term_len;
-      struct terminal_arr* terminal_arr_curr;
-      r = hashmap_get(m, symbol_curr, (any_t*)&terminal_arr_curr);
+#endif
+      trigger *            trigger_curr = state_curr->ptr + trigger_idx;
+      char *               symbol_curr = trigger_curr->term;
+      size_t               symbol_len = trigger_curr->term_len;
+      struct terminal_arr *terminal_arr_curr;
+      r = hashmap_get(m, symbol_curr, (any_t *)&terminal_arr_curr);
       if (r) {
+
         // the symbol is not in the map
-        if (!add_element_to_symbols_arr(symbols_arr, symbol_curr, symbol_len+1)) {
+        if (!add_element_to_symbols_arr(symbols_arr, symbol_curr,
+                                        symbol_len + 1)) {
+
           // the number of symbols exceed maximual number
           free_hashmap(m, &free_terminal_arr);
           return NULL;
+
         }
-        #ifdef DEBUG
+
+#ifdef DEBUG
         printf("Symbol %s is not in map\n", symbol_curr);
-        #endif
-        struct terminal_arr* new_terminal_arr = (struct terminal_arr*)malloc(sizeof(struct terminal_arr));
-        new_terminal_arr->start = (struct terminal_meta*)calloc(numstates, sizeof(struct terminal_meta));
-        #ifdef DEBUG
+#endif
+        struct terminal_arr *new_terminal_arr =
+            (struct terminal_arr *)malloc(sizeof(struct terminal_arr));
+        new_terminal_arr->start = (struct terminal_meta *)calloc(
+            numstates, sizeof(struct terminal_meta));
+#ifdef DEBUG
         printf("allocate new memory address %p\n", new_terminal_arr->start);
-        #endif
+#endif
         new_terminal_arr->start->state_name = state_idx;
         new_terminal_arr->start->dest = trigger_curr->dest;
         new_terminal_arr->start->trigger_idx = trigger_idx;
         new_terminal_arr->len = 1;
-        #ifdef DEBUG
-        printf("Symbol %s is included in %zu edges\n", symbol_curr, new_terminal_arr->len);
-        #endif
+#ifdef DEBUG
+        printf("Symbol %s is included in %zu edges\n", symbol_curr,
+               new_terminal_arr->len);
+#endif
         r = hashmap_put(m, symbol_curr, new_terminal_arr);
-        #ifdef DEBUG
+#ifdef DEBUG
         if (r) {
+
           printf("hashmap put failed\n");
-        }
-        else {
+
+        } else {
+
           printf("hashmap put succeeded\n");
+
         }
-        #endif
-        // if symbol not already in map, it's not in symbol_dict, simply add the symbol to the array
+
+#endif
+        // if symbol not already in map, it's not in symbol_dict, simply add the
+        // symbol to the array
         // TODO: need to initialize symbol dict (calloc)
-      }
-      else {
-        // the symbol is already in map
-        // append to terminal array
-        // no need to touch start
-        #ifdef DEBUG
+
+      } else {
+
+// the symbol is already in map
+// append to terminal array
+// no need to touch start
+#ifdef DEBUG
         printf("Symbol %s is in map\n", symbol_curr);
-        #endif
-        struct terminal_meta* modify = terminal_arr_curr->start + terminal_arr_curr->len;
+#endif
+        struct terminal_meta *modify =
+            terminal_arr_curr->start + terminal_arr_curr->len;
         modify->state_name = state_idx;
         modify->trigger_idx = trigger_idx;
         modify->dest = trigger_curr->dest;
         terminal_arr_curr->len++;
-        #ifdef DEBUG
-        printf("Symbol %s is included in %zu edges\n", symbol_curr, terminal_arr_curr->len);
-        #endif
-        // if symbol already in map, it's already in symbol_dict as well, no work needs to be done
+#ifdef DEBUG
+        printf("Symbol %s is included in %zu edges\n", symbol_curr,
+               terminal_arr_curr->len);
+#endif
+        // if symbol already in map, it's already in symbol_dict as well, no
+        // work needs to be done
+
       }
 
     }
+
   }
+
   return m;
+
 }
 
-void print_symbols_arr(struct symbols_arr* arr) {
+void print_symbols_arr(struct symbols_arr *arr) {
+
   size_t i;
   printf("(");
   for (i = 0; i < arr->len; i++) {
+
     printf("%s", arr->symbols_arr[i]);
     if (i != arr->len - 1) printf(",");
+
   }
+
   printf(")\n");
+
 }
 
 void free_hashmap(map_t m, int (*f)(any_t, any_t)) {
+
   if (!m) {
+
     printf("m map is empty\n");
     return;
+
   }
+
   int r = hashmap_iterate(m, f, NULL);
-  #ifdef DEBUG
-  if (!r) printf("free hashmap items successfully!\n");
-  else printf("free hashmap items failed");
-  #endif
+#ifdef DEBUG
+  if (!r)
+    printf("free hashmap items successfully!\n");
+  else
+    printf("free hashmap items failed");
+#endif
   hashmap_free(m);
+
 }
 
 int free_array_of_chars(any_t placeholder, any_t item) {
+
   if (!item) {
+
     printf("item is empty\n");
     return MAP_MISSING;
+
   }
-  struct symbols_arr* arr = item;
-  size_t i;
+
+  struct symbols_arr *arr = item;
+  size_t              i;
   for (i = 0; i < MAX_TERMINAL_NUMS; i++) {
+
     free(arr->symbols_arr[i]);
+
   }
+
   free(arr->symbols_arr);
   free(arr);
   return MAP_OK;
+
 }
 
-void free_pda(state* pda) {
+void free_pda(state *pda) {
+
   if (!pda) {
+
     printf("pda is null\n");
     return;
+
   }
+
   size_t i, j;
   for (i = 0; i < numstates; i++) {
-    state* state_curr = pda + i;
+
+    state *state_curr = pda + i;
     for (j = 0; j < state_curr->trigger_len; j++) {
-      trigger* trigger_curr = state_curr->ptr + j;
+
+      trigger *trigger_curr = state_curr->ptr + j;
       free(trigger_curr->id);
       free(trigger_curr->term);
+
     }
+
     free(state_curr->ptr);
+
   }
+
   free(pda);
+
 }
 
-int dfs(struct terminal_arr** tmp, const char* program, const size_t program_length, struct terminal_arr** res, size_t idx, int curr_state) {
-  if (*res) return 1; // 1 means successfully found a path
+int dfs(struct terminal_arr **tmp, const char *program,
+        const size_t program_length, struct terminal_arr **res, size_t idx,
+        int curr_state) {
+
+  if (*res) return 1;  // 1 means successfully found a path
   if (idx == program_length) {
+
     // test if the last terminal points to the final state
     if (curr_state != final_state) return 0;
     *res = *tmp;
     return 1;
+
   }
+
   if ((*tmp)->len == MAX_PROGRAM_WALK_LENGTH) {
+
     printf("Reached maximum program walk length\n");
     return 0;
+
   }
+
   char first_char[2];
-  first_char[0] = program[idx]; // first character of program
+  first_char[0] = program[idx];  // first character of program
   first_char[1] = '\0';
-  int r;
-  struct symbols_arr* matching_symbols;
-  r = hashmap_get(first_char_to_symbols_map, first_char, (any_t *)&matching_symbols);
+  int                 r;
+  struct symbols_arr *matching_symbols;
+  r = hashmap_get(first_char_to_symbols_map, first_char,
+                  (any_t *)&matching_symbols);
   if (r) {
-    printf("No symbols match the current character, abort!"); // hopefully won't reach this state
+
+    printf(
+        "No symbols match the current character, abort!");  // hopefully won't
+                                                            // reach this state
     return 0;
+
   }
+
   size_t i;
-  bool matched = false;
+  bool   matched = false;
   for (i = 0; i < matching_symbols->len; i++) {
+
     if (matched) break;
     char *matching_symbol = matching_symbols->symbols_arr[i];
     if (!strncmp(matching_symbol, program + idx, strlen(matching_symbol))) {
+
       // there is a match
       matched = true;
       // find the possible paths of that symbol
-      struct terminal_arr* ta;
+      struct terminal_arr *ta;
       int r2 = hashmap_get(pda_map, matching_symbol, (any_t *)&ta);
       if (!r2) {
+
         // the terminal is found in the dictionary
         size_t j;
         for (j = 0; j < ta->len; j++) {
+
           int state_name = (ta->start + j)->state_name;
           if (state_name != curr_state) continue;
           size_t trigger_idx = (ta->start + j)->trigger_idx;
-          int dest = (ta->start + j)->dest;
+          int    dest = (ta->start + j)->dest;
           (*tmp)->start[(*tmp)->len].state_name = state_name;
           (*tmp)->start[(*tmp)->len].trigger_idx = trigger_idx;
           (*tmp)->start[(*tmp)->len].dest = dest;
           (*tmp)->len++;
-          if (dfs(tmp, program, program_length, res, idx + strlen(matching_symbol), dest)) return 1;
+          if (dfs(tmp, program, program_length, res,
+                  idx + strlen(matching_symbol), dest))
+            return 1;
           (*tmp)->len--;
+
         }
-      }
-      else {
-        printf("No path goes out of this symbol, abort!"); // hopefully won't reach this state
+
+      } else {
+
+        printf("No path goes out of this symbol, abort!");  // hopefully won't
+                                                            // reach this state
         return 0;
+
       }
+
     }
+
   }
+
   return 0;
   /*
   1. First extract the first character of the current program
@@ -292,76 +414,102 @@ int dfs(struct terminal_arr** tmp, const char* program, const size_t program_len
   5. Recursion
   6. Pop the path from the terminal array
   7. - If idx reaches end of program, set tmp to res
-     - If idx is not at the end and nothing matches, the current path is not working, simply return 0
+     - If idx is not at the end and nothing matches, the current path is not
+  working, simply return 0
   */
+
 }
 
-Array* constructArray(struct terminal_arr* terminal_arr, state* pda) {
-  Array * res = (Array *)calloc(1, sizeof(Array));
+Array *constructArray(struct terminal_arr *terminal_arr, state *pda) {
+
+  Array *res = (Array *)calloc(1, sizeof(Array));
   initArray(res, INIT_SIZE);
   size_t i;
-  for (i = 0; i < terminal_arr->len; i ++) {
-    struct terminal_meta* curr = terminal_arr->start + i;
-    int state_name = curr->state_name;
-    int trigger_idx = curr->trigger_idx;
+  for (i = 0; i < terminal_arr->len; i++) {
+
+    struct terminal_meta *curr = terminal_arr->start + i;
+    int                   state_name = curr->state_name;
+    int                   trigger_idx = curr->trigger_idx;
     // get the symbol from pda
-    state* state_curr = pda + state_name;
-    trigger* trigger_curr = state_curr->ptr + trigger_idx;
-    char *symbol_curr = trigger_curr->term;
-    size_t symbol_curr_len = trigger_curr->term_len;
+    state *  state_curr = pda + state_name;
+    trigger *trigger_curr = state_curr->ptr + trigger_idx;
+    char *   symbol_curr = trigger_curr->term;
+    size_t   symbol_curr_len = trigger_curr->term_len;
     insertArray(res, state_name, symbol_curr, symbol_curr_len, trigger_idx);
+
   }
+
   return res;
+
 }
 
-Array* automaton_parser(const uint8_t *seed_fn) {
-    Array* parsed_res = NULL;
-    FILE* ptr;
-    ptr = fopen(seed_fn, "r");
-    if (ptr == NULL) {
-      printf("file can't be opened \n");
-      fclose(ptr);
-      return NULL;
-    }
-    char ch;
-    char program[MAX_PROGRAM_LENGTH];
-    int i = 0;
-    bool program_too_long = false;
-    do {
-      if (i == MAX_PROGRAM_LENGTH) {
-        // the maximum program length is reached
-        printf("maximum program length is reached, give up the current seed\n");
-        program_too_long = true;
-        break;
-      }
-      ch = fgetc(ptr);
-      program[i] = ch;
-      i ++;
-    } while (ch != EOF);
-    program[i-1] = '\0';
+Array *automaton_parser(const uint8_t *seed_fn) {
+
+  Array *parsed_res = NULL;
+  FILE * ptr;
+  ptr = fopen(seed_fn, "r");
+  if (ptr == NULL) {
+
+    printf("file can't be opened \n");
     fclose(ptr);
-    if ((i == 1 && program[0] == '\0') || program_too_long) return NULL;
-    struct terminal_arr* arr_holder;
-    struct terminal_arr* dfs_res = NULL;
-    arr_holder = (struct terminal_arr*)calloc(1, sizeof(struct terminal_arr));
-    arr_holder->start = (struct terminal_meta*)calloc(MAX_PROGRAM_WALK_LENGTH, sizeof(struct terminal_meta));
-    int dfs_success = dfs(&arr_holder, program, strlen(program), &dfs_res, 0, init_state);
-    // printf("*** return value %d *** \n", dfs_success);
-    if (dfs_success) {
-      parsed_res = constructArray(dfs_res, pda);
+    return NULL;
+
+  }
+
+  char ch;
+  char program[MAX_PROGRAM_LENGTH];
+  int  i = 0;
+  bool program_too_long = false;
+  do {
+
+    if (i == MAX_PROGRAM_LENGTH) {
+
+      // the maximum program length is reached
+      printf("maximum program length is reached, give up the current seed\n");
+      program_too_long = true;
+      break;
+
     }
-    free(arr_holder->start);
-    free(arr_holder);
-    return parsed_res;
+
+    ch = fgetc(ptr);
+    program[i] = ch;
+    i++;
+
+  } while (ch != EOF);
+
+  program[i - 1] = '\0';
+  fclose(ptr);
+  if ((i == 1 && program[0] == '\0') || program_too_long) return NULL;
+  struct terminal_arr *arr_holder;
+  struct terminal_arr *dfs_res = NULL;
+  arr_holder = (struct terminal_arr *)calloc(1, sizeof(struct terminal_arr));
+  arr_holder->start = (struct terminal_meta *)calloc(
+      MAX_PROGRAM_WALK_LENGTH, sizeof(struct terminal_meta));
+  int dfs_success =
+      dfs(&arr_holder, program, strlen(program), &dfs_res, 0, init_state);
+  // printf("*** return value %d *** \n", dfs_success);
+  if (dfs_success) { parsed_res = constructArray(dfs_res, pda); }
+  free(arr_holder->start);
+  free(arr_holder);
+  return parsed_res;
+
 }
 
 // return 0 if fails
 // return 1 if succeeds
-int add_element_to_symbols_arr(struct symbols_arr* symbols_arr, char* symbol, size_t symbol_len) {
-  if (symbols_arr->len >= MAX_TERMINAL_NUMS || symbol_len >= MAX_TERMINAL_LENGTH) {
+int add_element_to_symbols_arr(struct symbols_arr *symbols_arr, char *symbol,
+                               size_t symbol_len) {
+
+  if (symbols_arr->len >= MAX_TERMINAL_NUMS ||
+      symbol_len >= MAX_TERMINAL_LENGTH) {
+
     return 0;
+
   }
+
   strncpy(symbols_arr->symbols_arr[symbols_arr->len], symbol, symbol_len);
   symbols_arr->len++;
   return 1;
-}
\ No newline at end of file
+
+}
+