diff options
Diffstat (limited to 'custom_mutators/gramatron/automaton-parser.c')
-rw-r--r-- | custom_mutators/gramatron/automaton-parser.c | 515 |
1 files changed, 0 insertions, 515 deletions
diff --git a/custom_mutators/gramatron/automaton-parser.c b/custom_mutators/gramatron/automaton-parser.c deleted file mode 100644 index 266f5a07..00000000 --- a/custom_mutators/gramatron/automaton-parser.c +++ /dev/null @@ -1,515 +0,0 @@ -#include "afl-fuzz.h" -#include "automaton-parser.h" - -int free_terminal_arr(any_t placeholder, any_t item) { - - struct terminal_arr *tmp = item; - free(tmp->start); - free(tmp); - return MAP_OK; - -} - -int compare_two_symbols(const void *a, const void *b) { - - char * a_char = *(char **)a; - char * b_char = *(char **)b; - size_t fa = strlen(a_char); - size_t fb = strlen(b_char); - if (fa > fb) - return -1; - else if (fa == fb) - return 0; - else - return 1; - -} - -// TODO: create a map -// key: first character of a symbol, value: a list of symbols that starts with -// key, the list is sorted in descending order of the symbol lengths -map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols, - struct symbols_arr *first_chars) { - - map_t char_to_symbols = hashmap_new(); - // TODO: free the allocated map - // sort the symbol_dict in descending order of the symbol lengths - qsort(symbols->symbols_arr, symbols->len, sizeof(char *), - compare_two_symbols); -#ifdef DEBUG - printf("------ print after sort ------\n"); - print_symbols_arr(symbols); -#endif - size_t i; - int r; // response from hashmap get and put - for (i = 0; i < symbols->len; i++) { - - char *symbol_curr = symbols->symbols_arr[i]; - // get first character from symbol_curr - char first_character[2]; - first_character[0] = symbol_curr[0]; - first_character[1] = '\0'; -#ifdef DEBUG - printf("****** Current symbol is %s, its first character is %s ******\n", - symbol_curr, first_character); -#endif - // key would be the first character of symbol_curr - // the value would be an array of chars - struct symbols_arr *associated_symbols; - r = hashmap_get(char_to_symbols, first_character, - (any_t *)&associated_symbols); - if (!r) { - -// append current symbol to existing array -#ifdef DEBUG - printf("****** First character %s is already in hashmap ******\n", - first_character); -#endif - if (!add_element_to_symbols_arr(associated_symbols, symbol_curr, - strlen(symbol_curr) + 1)) { - - free_hashmap(char_to_symbols, &free_array_of_chars); - return NULL; - - } - - } else { - -// start a new symbols_arr -#ifdef DEBUG - printf("****** First character %s is not in hashmap ******\n", - first_character); -#endif - struct symbols_arr *new_associated_symbols = create_array_of_chars(); - strncpy(first_chars->symbols_arr[first_chars->len], first_character, - 2); // 2 because one character plus the NULL byte - add_element_to_symbols_arr(new_associated_symbols, symbol_curr, - strlen(symbol_curr) + 1); - r = hashmap_put(char_to_symbols, - first_chars->symbols_arr[first_chars->len], - new_associated_symbols); - first_chars->len++; -#ifdef DEBUG - if (r) { - - printf("hashmap put failed\n"); - - } else { - - printf("hashmap put succeeded\n"); - - } - -#endif - - } - - } - - printf("****** Testing ******\n"); - struct symbols_arr *tmp_arr; - char str[] = "i"; - int t = hashmap_get(char_to_symbols, str, (any_t *)&tmp_arr); - if (!t) print_symbols_arr(tmp_arr); - return char_to_symbols; - -} - -struct symbols_arr *create_array_of_chars() { - - struct symbols_arr *ret = - (struct symbols_arr *)malloc(sizeof(struct symbols_arr)); - ret->len = 0; - ret->symbols_arr = (char **)malloc(MAX_TERMINAL_NUMS * sizeof(char *)); - size_t i; - for (i = 0; i < MAX_TERMINAL_NUMS; i++) { - - ret->symbols_arr[i] = (char *)calloc(MAX_TERMINAL_LENGTH, sizeof(char)); - - } - - return ret; - -} - -// map a symbol to a list of (state, trigger_idx) -map_t create_pda_hashmap(state *pda, struct symbols_arr *symbols_arr) { - - int state_idx, trigger_idx, - r; // r is the return result for hashmap operation - map_t m = hashmap_new(); - // iterate over pda - for (state_idx = 0; state_idx < numstates; state_idx++) { - -#ifdef DEBUG - printf("------ The state idx is %d ------\n", state_idx); -#endif - if (state_idx == final_state) continue; - state *state_curr = pda + state_idx; - for (trigger_idx = 0; trigger_idx < state_curr->trigger_len; - trigger_idx++) { - -#ifdef DEBUG - printf("------ The trigger idx is %d ------\n", trigger_idx); -#endif - trigger * trigger_curr = state_curr->ptr + trigger_idx; - char * symbol_curr = trigger_curr->term; - size_t symbol_len = trigger_curr->term_len; - struct terminal_arr *terminal_arr_curr; - r = hashmap_get(m, symbol_curr, (any_t *)&terminal_arr_curr); - if (r) { - - // the symbol is not in the map - if (!add_element_to_symbols_arr(symbols_arr, symbol_curr, - symbol_len + 1)) { - - // the number of symbols exceed maximual number - free_hashmap(m, &free_terminal_arr); - return NULL; - - } - -#ifdef DEBUG - printf("Symbol %s is not in map\n", symbol_curr); -#endif - struct terminal_arr *new_terminal_arr = - (struct terminal_arr *)malloc(sizeof(struct terminal_arr)); - new_terminal_arr->start = (struct terminal_meta *)calloc( - numstates, sizeof(struct terminal_meta)); -#ifdef DEBUG - printf("allocate new memory address %p\n", new_terminal_arr->start); -#endif - new_terminal_arr->start->state_name = state_idx; - new_terminal_arr->start->dest = trigger_curr->dest; - new_terminal_arr->start->trigger_idx = trigger_idx; - new_terminal_arr->len = 1; -#ifdef DEBUG - printf("Symbol %s is included in %zu edges\n", symbol_curr, - new_terminal_arr->len); -#endif - r = hashmap_put(m, symbol_curr, new_terminal_arr); -#ifdef DEBUG - if (r) { - - printf("hashmap put failed\n"); - - } else { - - printf("hashmap put succeeded\n"); - - } - -#endif - // if symbol not already in map, it's not in symbol_dict, simply add the - // symbol to the array - // TODO: need to initialize symbol dict (calloc) - - } else { - -// the symbol is already in map -// append to terminal array -// no need to touch start -#ifdef DEBUG - printf("Symbol %s is in map\n", symbol_curr); -#endif - struct terminal_meta *modify = - terminal_arr_curr->start + terminal_arr_curr->len; - modify->state_name = state_idx; - modify->trigger_idx = trigger_idx; - modify->dest = trigger_curr->dest; - terminal_arr_curr->len++; -#ifdef DEBUG - printf("Symbol %s is included in %zu edges\n", symbol_curr, - terminal_arr_curr->len); -#endif - // if symbol already in map, it's already in symbol_dict as well, no - // work needs to be done - - } - - } - - } - - return m; - -} - -void print_symbols_arr(struct symbols_arr *arr) { - - size_t i; - printf("("); - for (i = 0; i < arr->len; i++) { - - printf("%s", arr->symbols_arr[i]); - if (i != arr->len - 1) printf(","); - - } - - printf(")\n"); - -} - -void free_hashmap(map_t m, int (*f)(any_t, any_t)) { - - if (!m) { - - printf("m map is empty\n"); - return; - - } - - int r = hashmap_iterate(m, f, NULL); -#ifdef DEBUG - if (!r) - printf("free hashmap items successfully!\n"); - else - printf("free hashmap items failed"); -#endif - hashmap_free(m); - -} - -int free_array_of_chars(any_t placeholder, any_t item) { - - if (!item) { - - printf("item is empty\n"); - return MAP_MISSING; - - } - - struct symbols_arr *arr = item; - size_t i; - for (i = 0; i < MAX_TERMINAL_NUMS; i++) { - - free(arr->symbols_arr[i]); - - } - - free(arr->symbols_arr); - free(arr); - return MAP_OK; - -} - -void free_pda(state *pda) { - - if (!pda) { - - printf("pda is null\n"); - return; - - } - - size_t i, j; - for (i = 0; i < numstates; i++) { - - state *state_curr = pda + i; - for (j = 0; j < state_curr->trigger_len; j++) { - - trigger *trigger_curr = state_curr->ptr + j; - free(trigger_curr->id); - free(trigger_curr->term); - - } - - free(state_curr->ptr); - - } - - free(pda); - -} - -int dfs(struct terminal_arr **tmp, const char *program, - const size_t program_length, struct terminal_arr **res, size_t idx, - int curr_state) { - - if (*res) return 1; // 1 means successfully found a path - if (idx == program_length) { - - // test if the last terminal points to the final state - if (curr_state != final_state) return 0; - *res = *tmp; - return 1; - - } - - if ((*tmp)->len == MAX_PROGRAM_WALK_LENGTH) { - - printf("Reached maximum program walk length\n"); - return 0; - - } - - char first_char[2]; - first_char[0] = program[idx]; // first character of program - first_char[1] = '\0'; - int r; - struct symbols_arr *matching_symbols; - r = hashmap_get(first_char_to_symbols_map, first_char, - (any_t *)&matching_symbols); - if (r) { - - printf( - "No symbols match the current character, abort!"); // hopefully won't - // reach this state - return 0; - - } - - size_t i; - bool matched = false; - for (i = 0; i < matching_symbols->len; i++) { - - if (matched) break; - char *matching_symbol = matching_symbols->symbols_arr[i]; - if (!strncmp(matching_symbol, program + idx, strlen(matching_symbol))) { - - // there is a match - matched = true; - // find the possible paths of that symbol - struct terminal_arr *ta; - int r2 = hashmap_get(pda_map, matching_symbol, (any_t *)&ta); - if (!r2) { - - // the terminal is found in the dictionary - size_t j; - for (j = 0; j < ta->len; j++) { - - int state_name = (ta->start + j)->state_name; - if (state_name != curr_state) continue; - size_t trigger_idx = (ta->start + j)->trigger_idx; - int dest = (ta->start + j)->dest; - (*tmp)->start[(*tmp)->len].state_name = state_name; - (*tmp)->start[(*tmp)->len].trigger_idx = trigger_idx; - (*tmp)->start[(*tmp)->len].dest = dest; - (*tmp)->len++; - if (dfs(tmp, program, program_length, res, - idx + strlen(matching_symbol), dest)) - return 1; - (*tmp)->len--; - - } - - } else { - - printf("No path goes out of this symbol, abort!"); // hopefully won't - // reach this state - return 0; - - } - - } - - } - - return 0; - /* - 1. First extract the first character of the current program - 2. Match the possible symbols of that program - 3. Find the possible paths of that symbol - 4. Add to temporary terminal array - 5. Recursion - 6. Pop the path from the terminal array - 7. - If idx reaches end of program, set tmp to res - - If idx is not at the end and nothing matches, the current path is not - working, simply return 0 - */ - -} - -Array *constructArray(struct terminal_arr *terminal_arr, state *pda) { - - Array *res = (Array *)calloc(1, sizeof(Array)); - initArray(res, INIT_SIZE); - size_t i; - for (i = 0; i < terminal_arr->len; i++) { - - struct terminal_meta *curr = terminal_arr->start + i; - int state_name = curr->state_name; - int trigger_idx = curr->trigger_idx; - // get the symbol from pda - state * state_curr = pda + state_name; - trigger *trigger_curr = state_curr->ptr + trigger_idx; - char * symbol_curr = trigger_curr->term; - size_t symbol_curr_len = trigger_curr->term_len; - insertArray(res, state_name, symbol_curr, symbol_curr_len, trigger_idx); - - } - - return res; - -} - -Array *automaton_parser(const uint8_t *seed_fn) { - - Array *parsed_res = NULL; - FILE * ptr; - ptr = fopen(seed_fn, "r"); - if (ptr == NULL) { - - printf("file can't be opened \n"); - fclose(ptr); - return NULL; - - } - - char ch; - char program[MAX_PROGRAM_LENGTH]; - int i = 0; - bool program_too_long = false; - do { - - if (i == MAX_PROGRAM_LENGTH) { - - // the maximum program length is reached - printf("maximum program length is reached, give up the current seed\n"); - program_too_long = true; - break; - - } - - ch = fgetc(ptr); - program[i] = ch; - i++; - - } while (ch != EOF); - - program[i - 1] = '\0'; - fclose(ptr); - if ((i == 1 && program[0] == '\0') || program_too_long) return NULL; - struct terminal_arr *arr_holder; - struct terminal_arr *dfs_res = NULL; - arr_holder = (struct terminal_arr *)calloc(1, sizeof(struct terminal_arr)); - arr_holder->start = (struct terminal_meta *)calloc( - MAX_PROGRAM_WALK_LENGTH, sizeof(struct terminal_meta)); - int dfs_success = - dfs(&arr_holder, program, strlen(program), &dfs_res, 0, init_state); - // printf("*** return value %d *** \n", dfs_success); - if (dfs_success) { parsed_res = constructArray(dfs_res, pda); } - free(arr_holder->start); - free(arr_holder); - return parsed_res; - -} - -// return 0 if fails -// return 1 if succeeds -int add_element_to_symbols_arr(struct symbols_arr *symbols_arr, char *symbol, - size_t symbol_len) { - - if (symbols_arr->len >= MAX_TERMINAL_NUMS || - symbol_len >= MAX_TERMINAL_LENGTH) { - - return 0; - - } - - strncpy(symbols_arr->symbols_arr[symbols_arr->len], symbol, symbol_len); - symbols_arr->len++; - return 1; - -} - |