diff options
-rw-r--r-- | custom_mutators/gramatron/automaton-parser.c | 367 | ||||
-rw-r--r-- | custom_mutators/gramatron/automaton-parser.h | 74 | ||||
-rwxr-xr-x | custom_mutators/gramatron/build_gramatron_mutator.sh | 4 | ||||
-rw-r--r-- | custom_mutators/gramatron/gramfuzz.c | 41 | ||||
-rw-r--r-- | custom_mutators/gramatron/hashmap.c | 5 | ||||
-rw-r--r-- | custom_mutators/gramatron/testMakefile.mk | 2 | ||||
-rwxr-xr-x | custom_mutators/grammar_mutator/build_grammar_mutator.sh | 2 |
7 files changed, 486 insertions, 9 deletions
diff --git a/custom_mutators/gramatron/automaton-parser.c b/custom_mutators/gramatron/automaton-parser.c new file mode 100644 index 00000000..3265e0cf --- /dev/null +++ b/custom_mutators/gramatron/automaton-parser.c @@ -0,0 +1,367 @@ +#include "afl-fuzz.h" +#include "automaton-parser.h" + +int free_terminal_arr(any_t placeholder, any_t item) { + struct terminal_arr* tmp = item; + free(tmp->start); + free(tmp); + return MAP_OK; +} + +int compare_two_symbols(const void * a, const void * b) { + char* a_char = *(char **)a; + char* b_char = *(char **)b; + size_t fa = strlen(a_char); + size_t fb = strlen(b_char); + if (fa > fb) return -1; + else if (fa == fb) return 0; + else return 1; + +} + +// TODO: create a map +// key: first character of a symbol, value: a list of symbols that starts with key, the list is sorted in descending order of the symbol lengths +map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols, struct symbols_arr *first_chars) { + map_t char_to_symbols = hashmap_new(); + // TODO: free the allocated map + // sort the symbol_dict in descending order of the symbol lengths + qsort(symbols->symbols_arr, symbols->len, sizeof(char*), compare_two_symbols); + #ifdef DEBUG + printf("------ print after sort ------\n"); + print_symbols_arr(symbols); + #endif + size_t i; + int r; // response from hashmap get and put + for (i = 0; i < symbols->len; i++) { + char* symbol_curr = symbols->symbols_arr[i]; + // get first character from symbol_curr + char first_character[2]; + first_character[0] = symbol_curr[0]; + first_character[1] = '\0'; + #ifdef DEBUG + printf("****** Current symbol is %s, its first character is %s ******\n", symbol_curr, first_character); + #endif + // key would be the first character of symbol_curr + // the value would be an array of chars + struct symbols_arr* associated_symbols; + r = hashmap_get(char_to_symbols, first_character, (any_t*)&associated_symbols); + if (!r) { + // append current symbol to existing array + #ifdef DEBUG + printf("****** First character %s is already in hashmap ******\n", first_character); + #endif + if(!add_element_to_symbols_arr(associated_symbols, symbol_curr, strlen(symbol_curr) + 1)) { + free_hashmap(char_to_symbols, &free_array_of_chars); + return NULL; + } + } + else { + // start a new symbols_arr + #ifdef DEBUG + printf("****** First character %s is not in hashmap ******\n", first_character); + #endif + struct symbols_arr* new_associated_symbols = create_array_of_chars(); + strncpy(first_chars->symbols_arr[first_chars->len], first_character, 2); // 2 because one character plus the NULL byte + add_element_to_symbols_arr(new_associated_symbols, symbol_curr, strlen(symbol_curr) + 1); + r = hashmap_put(char_to_symbols, first_chars->symbols_arr[first_chars->len], new_associated_symbols); + first_chars->len++; + #ifdef DEBUG + if (r) { + printf("hashmap put failed\n"); + } + else { + printf("hashmap put succeeded\n"); + } + #endif + } + } + printf("****** Testing ******\n"); + struct symbols_arr* tmp_arr; + char str[] = "i"; + int t = hashmap_get(char_to_symbols, str, (any_t *)&tmp_arr); + if (!t) + print_symbols_arr(tmp_arr); + return char_to_symbols; +} + +struct symbols_arr* create_array_of_chars() { + struct symbols_arr* ret = (struct symbols_arr*)malloc(sizeof(struct symbols_arr)); + ret->len = 0; + ret->symbols_arr = (char **)malloc(MAX_TERMINAL_NUMS * sizeof(char*)); + size_t i; + for (i = 0; i < MAX_TERMINAL_NUMS; i++) { + ret->symbols_arr[i] = (char *)calloc(MAX_TERMINAL_LENGTH, sizeof(char)); + } + return ret; +} + +// map a symbol to a list of (state, trigger_idx) +map_t create_pda_hashmap(state* pda, struct symbols_arr* symbols_arr) { + int state_idx, trigger_idx, r; // r is the return result for hashmap operation + map_t m = hashmap_new(); + // iterate over pda + for (state_idx = 0; state_idx < numstates; state_idx++) { + #ifdef DEBUG + printf("------ The state idx is %d ------\n", state_idx); + #endif + if (state_idx == final_state) continue; + state* state_curr = pda + state_idx; + for (trigger_idx = 0; trigger_idx < state_curr->trigger_len; trigger_idx++) { + #ifdef DEBUG + printf("------ The trigger idx is %d ------\n", trigger_idx); + #endif + trigger* trigger_curr = state_curr->ptr + trigger_idx; + char* symbol_curr = trigger_curr->term; + size_t symbol_len = trigger_curr->term_len; + struct terminal_arr* terminal_arr_curr; + r = hashmap_get(m, symbol_curr, (any_t*)&terminal_arr_curr); + if (r) { + // the symbol is not in the map + if (!add_element_to_symbols_arr(symbols_arr, symbol_curr, symbol_len+1)) { + // the number of symbols exceed maximual number + free_hashmap(m, &free_terminal_arr); + return NULL; + } + #ifdef DEBUG + printf("Symbol %s is not in map\n", symbol_curr); + #endif + struct terminal_arr* new_terminal_arr = (struct terminal_arr*)malloc(sizeof(struct terminal_arr)); + new_terminal_arr->start = (struct terminal_meta*)calloc(numstates, sizeof(struct terminal_meta)); + #ifdef DEBUG + printf("allocate new memory address %p\n", new_terminal_arr->start); + #endif + new_terminal_arr->start->state_name = state_idx; + new_terminal_arr->start->dest = trigger_curr->dest; + new_terminal_arr->start->trigger_idx = trigger_idx; + new_terminal_arr->len = 1; + #ifdef DEBUG + printf("Symbol %s is included in %zu edges\n", symbol_curr, new_terminal_arr->len); + #endif + r = hashmap_put(m, symbol_curr, new_terminal_arr); + #ifdef DEBUG + if (r) { + printf("hashmap put failed\n"); + } + else { + printf("hashmap put succeeded\n"); + } + #endif + // if symbol not already in map, it's not in symbol_dict, simply add the symbol to the array + // TODO: need to initialize symbol dict (calloc) + } + else { + // the symbol is already in map + // append to terminal array + // no need to touch start + #ifdef DEBUG + printf("Symbol %s is in map\n", symbol_curr); + #endif + struct terminal_meta* modify = terminal_arr_curr->start + terminal_arr_curr->len; + modify->state_name = state_idx; + modify->trigger_idx = trigger_idx; + modify->dest = trigger_curr->dest; + terminal_arr_curr->len++; + #ifdef DEBUG + printf("Symbol %s is included in %zu edges\n", symbol_curr, terminal_arr_curr->len); + #endif + // if symbol already in map, it's already in symbol_dict as well, no work needs to be done + } + + } + } + return m; +} + +void print_symbols_arr(struct symbols_arr* arr) { + size_t i; + printf("("); + for (i = 0; i < arr->len; i++) { + printf("%s", arr->symbols_arr[i]); + if (i != arr->len - 1) printf(","); + } + printf(")\n"); +} + +void free_hashmap(map_t m, int (*f)(any_t, any_t)) { + if (!m) { + printf("m map is empty\n"); + return; + } + int r = hashmap_iterate(m, f, NULL); + #ifdef DEBUG + if (!r) printf("free hashmap items successfully!\n"); + else printf("free hashmap items failed"); + #endif + hashmap_free(m); +} + +int free_array_of_chars(any_t placeholder, any_t item) { + if (!item) { + printf("item is empty\n"); + return MAP_MISSING; + } + struct symbols_arr* arr = item; + size_t i; + for (i = 0; i < MAX_TERMINAL_NUMS; i++) { + free(arr->symbols_arr[i]); + } + free(arr->symbols_arr); + free(arr); + return MAP_OK; +} + +void free_pda(state* pda) { + if (!pda) { + printf("pda is null\n"); + return; + } + size_t i, j; + for (i = 0; i < numstates; i++) { + state* state_curr = pda + i; + for (j = 0; j < state_curr->trigger_len; j++) { + trigger* trigger_curr = state_curr->ptr + j; + free(trigger_curr->id); + free(trigger_curr->term); + } + free(state_curr->ptr); + } + free(pda); +} + +int dfs(struct terminal_arr** tmp, const char* program, const size_t program_length, struct terminal_arr** res, size_t idx, int curr_state) { + if (*res) return 1; // 1 means successfully found a path + if (idx == program_length) { + // test if the last terminal points to the final state + if (curr_state != final_state) return 0; + *res = *tmp; + return 1; + } + if ((*tmp)->len == MAX_PROGRAM_WALK_LENGTH) { + printf("Reached maximum program walk length\n"); + return 0; + } + char first_char[2]; + first_char[0] = program[idx]; // first character of program + first_char[1] = '\0'; + int r; + struct symbols_arr* matching_symbols; + r = hashmap_get(first_char_to_symbols_map, first_char, (any_t *)&matching_symbols); + if (r) { + printf("No symbols match the current character, abort!"); // hopefully won't reach this state + return 0; + } + size_t i; + bool matched = false; + for (i = 0; i < matching_symbols->len; i++) { + if (matched) break; + char *matching_symbol = matching_symbols->symbols_arr[i]; + if (!strncmp(matching_symbol, program + idx, strlen(matching_symbol))) { + // there is a match + matched = true; + // find the possible paths of that symbol + struct terminal_arr* ta; + int r2 = hashmap_get(pda_map, matching_symbol, (any_t *)&ta); + if (!r2) { + // the terminal is found in the dictionary + size_t j; + for (j = 0; j < ta->len; j++) { + int state_name = (ta->start + j)->state_name; + if (state_name != curr_state) continue; + size_t trigger_idx = (ta->start + j)->trigger_idx; + int dest = (ta->start + j)->dest; + (*tmp)->start[(*tmp)->len].state_name = state_name; + (*tmp)->start[(*tmp)->len].trigger_idx = trigger_idx; + (*tmp)->start[(*tmp)->len].dest = dest; + (*tmp)->len++; + if (dfs(tmp, program, program_length, res, idx + strlen(matching_symbol), dest)) return 1; + (*tmp)->len--; + } + } + else { + printf("No path goes out of this symbol, abort!"); // hopefully won't reach this state + return 0; + } + } + } + return 0; + /* + 1. First extract the first character of the current program + 2. Match the possible symbols of that program + 3. Find the possible paths of that symbol + 4. Add to temporary terminal array + 5. Recursion + 6. Pop the path from the terminal array + 7. - If idx reaches end of program, set tmp to res + - If idx is not at the end and nothing matches, the current path is not working, simply return 0 + */ +} + +Array* constructArray(struct terminal_arr* terminal_arr, state* pda) { + Array * res = (Array *)calloc(1, sizeof(Array)); + initArray(res, INIT_SIZE); + size_t i; + for (i = 0; i < terminal_arr->len; i ++) { + struct terminal_meta* curr = terminal_arr->start + i; + int state_name = curr->state_name; + int trigger_idx = curr->trigger_idx; + // get the symbol from pda + state* state_curr = pda + state_name; + trigger* trigger_curr = state_curr->ptr + trigger_idx; + char *symbol_curr = trigger_curr->term; + size_t symbol_curr_len = trigger_curr->term_len; + insertArray(res, state_name, symbol_curr, symbol_curr_len, trigger_idx); + } + return res; +} + +Array* automaton_parser(const uint8_t *seed_fn) { + Array* parsed_res = NULL; + FILE* ptr; + ptr = fopen(seed_fn, "r"); + if (ptr == NULL) { + printf("file can't be opened \n"); + fclose(ptr); + return NULL; + } + char ch; + char program[MAX_PROGRAM_LENGTH]; + int i = 0; + bool program_too_long = false; + do { + if (i == MAX_PROGRAM_LENGTH) { + // the maximum program length is reached + printf("maximum program length is reached, give up the current seed\n"); + program_too_long = true; + break; + } + ch = fgetc(ptr); + program[i] = ch; + i ++; + } while (ch != EOF); + program[i-1] = '\0'; + fclose(ptr); + if ((i == 1 && program[0] == '\0') || program_too_long) return NULL; + struct terminal_arr* arr_holder; + struct terminal_arr* dfs_res = NULL; + arr_holder = (struct terminal_arr*)calloc(1, sizeof(struct terminal_arr)); + arr_holder->start = (struct terminal_meta*)calloc(MAX_PROGRAM_WALK_LENGTH, sizeof(struct terminal_meta)); + int dfs_success = dfs(&arr_holder, program, strlen(program), &dfs_res, 0, init_state); + // printf("*** return value %d *** \n", dfs_success); + if (dfs_success) { + parsed_res = constructArray(dfs_res, pda); + } + free(arr_holder->start); + free(arr_holder); + return parsed_res; +} + +// return 0 if fails +// return 1 if succeeds +int add_element_to_symbols_arr(struct symbols_arr* symbols_arr, char* symbol, size_t symbol_len) { + if (symbols_arr->len >= MAX_TERMINAL_NUMS || symbol_len >= MAX_TERMINAL_LENGTH) { + return 0; + } + strncpy(symbols_arr->symbols_arr[symbols_arr->len], symbol, symbol_len); + symbols_arr->len++; + return 1; +} \ No newline at end of file diff --git a/custom_mutators/gramatron/automaton-parser.h b/custom_mutators/gramatron/automaton-parser.h new file mode 100644 index 00000000..d67a1679 --- /dev/null +++ b/custom_mutators/gramatron/automaton-parser.h @@ -0,0 +1,74 @@ +#ifndef _AUTOMATON_PARSER_H +#define _AUTOMATON_PARSER_H + +#define NUMINPUTS 500 +#define MAX_PROGRAM_LENGTH 20000 +#define MAX_PROGRAM_WALK_LENGTH 5000 +#define MAX_TERMINAL_NUMS 5000 +#define MAX_TERMINAL_LENGTH 1000 +#define MAX_PROGRAM_NAME_LENGTH 5000 + +#include "gramfuzz.h" + +// represents an edge in the FSA +struct terminal_meta { + + int state_name; + int trigger_idx; + int dest; + +} ; + +// represents a set of edges +struct terminal_arr { + + struct terminal_meta* start; + size_t len; + +} ; + +// essentially a string array +struct symbols_arr { + char** symbols_arr; + size_t len; +} ; + +struct symbols_arr* symbols; // symbols contain all the symbols in the language +map_t pda_map; // a map that maps each symbol in the language to a set of edges +struct symbols_arr* first_chars; // an array of first characters, only temperary array +map_t first_char_to_symbols_map; // a map that maps each first character to a set of symbols (the symbols are sorted in descending order) + + + +// freeing terminal arrays +int free_terminal_arr(any_t placeholder, any_t item); + +// return a map that maps each symbol in the language to a set of edges +// populate symbols_arr with all the symbols in the language +map_t create_pda_hashmap(state* pda, struct symbols_arr* symbols_arr); + +// print the string array +void print_symbols_arr(struct symbols_arr* arr); + +// free hashmap +// the function pointer contains function to free the values in the hashmap +void free_hashmap(map_t m, int (*f)(any_t, any_t)); + +// free string array +int free_array_of_chars(any_t placeholder, any_t item); + +// free the pda +void free_pda(state* pda); + +// create a string array +struct symbols_arr* create_array_of_chars(); + +map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols, struct symbols_arr *first_chars); + +// return the automaton represented by the seed +Array* automaton_parser(const uint8_t *seed_fn); + +int add_element_to_symbols_arr(struct symbols_arr* symbols_arr, char* symbol, size_t symbol_len); + + +#endif \ No newline at end of file diff --git a/custom_mutators/gramatron/build_gramatron_mutator.sh b/custom_mutators/gramatron/build_gramatron_mutator.sh index 9952e7f5..0638e3b2 100755 --- a/custom_mutators/gramatron/build_gramatron_mutator.sh +++ b/custom_mutators/gramatron/build_gramatron_mutator.sh @@ -125,7 +125,7 @@ else } fi -test -d json-c/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +test -f json-c/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } echo "[+] Got json-c." test -e json-c/.libs/libjson-c.a || { @@ -144,6 +144,6 @@ echo echo echo "[+] Json-c successfully prepared!" echo "[+] Builing gramatron now." -$CC -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c ../../src/afl-performance.o json-c/.libs/libjson-c.a || exit 1 +$CC -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c automaton-parser.c ../../src/afl-performance.o json-c/.libs/libjson-c.a || exit 1 echo echo "[+] gramatron successfully built!" diff --git a/custom_mutators/gramatron/gramfuzz.c b/custom_mutators/gramatron/gramfuzz.c index 9c9dbb43..ccdbbe60 100644 --- a/custom_mutators/gramatron/gramfuzz.c +++ b/custom_mutators/gramatron/gramfuzz.c @@ -9,6 +9,7 @@ #include "afl-fuzz.h" #include "gramfuzz.h" +#include "automaton-parser.h" #define MUTATORS 4 // Specify the total number of mutators @@ -163,6 +164,11 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) { if (automaton_file) { pda = create_pda(automaton_file); + symbols = create_array_of_chars(); + pda_map = create_pda_hashmap((struct state*)pda, symbols); + print_symbols_arr(symbols); + first_chars = create_array_of_chars(); + first_char_to_symbols_map = create_first_char_to_symbols_hashmap(symbols, first_chars); } else { @@ -281,12 +287,25 @@ u8 afl_custom_queue_new_entry(my_mutator_t * data, // filename_new_queue,filename_orig_queue,automaton_fn); if (filename_orig_queue) { - - write_input(data->mutated_walk, automaton_fn); + if (data->mutated_walk) { + write_input(data->mutated_walk, automaton_fn); + } + else { + Array* parsed_walk = automaton_parser(filename_new_queue); + if (!parsed_walk) PFATAL("Parser unsuccessful on %s", filename_new_queue); + write_input(parsed_walk, automaton_fn); + free(parsed_walk->start); + free(parsed_walk); + } } else { - new_input = gen_input(pda, NULL); + // TODO: try to parse the input seeds here, if they can be parsed, then generate the corresponding automaton file + // if not, then generate a new input + new_input = automaton_parser(filename_new_queue); + if (new_input == NULL) { + new_input = gen_input(pda, NULL); + } write_input(new_input, automaton_fn); // Update the placeholder file @@ -328,6 +347,16 @@ uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) { // get the filename u8 * automaton_fn = alloc_printf("%s.aut", filename); + // find the automaton file, if the automaton file cannot be found, do not fuzz the current entry on the queue + FILE *fp; + fp = fopen(automaton_fn, "rb"); + if (fp == NULL) { + + printf("File '%s' does not exist, exiting. Would not fuzz current entry on the queue\n", automaton_fn); + return 0; + + } + IdxMap_new *statemap_ptr; terminal * term_ptr; int state; @@ -424,6 +453,10 @@ void afl_custom_deinit(my_mutator_t *data) { free(data->mutator_buf); free(data); - + free_hashmap(pda_map, &free_terminal_arr); + free_hashmap(first_char_to_symbols_map, &free_array_of_chars); + free_pda(pda); + free_array_of_chars(NULL, symbols); // free the array of symbols + free_array_of_chars(NULL, first_chars); } diff --git a/custom_mutators/gramatron/hashmap.c b/custom_mutators/gramatron/hashmap.c index 09715b87..4f97e085 100644 --- a/custom_mutators/gramatron/hashmap.c +++ b/custom_mutators/gramatron/hashmap.c @@ -151,7 +151,7 @@ static unsigned long crc32_tab[] = { /* Return a 32-bit CRC of the contents of the buffer. */ -unsigned long crc32(const unsigned char *s, unsigned int len) { +unsigned long custom_crc32(const unsigned char *s, unsigned int len) { unsigned int i; unsigned long crc32val; @@ -171,8 +171,9 @@ unsigned long crc32(const unsigned char *s, unsigned int len) { * Hashing function for a string */ unsigned int hashmap_hash_int(hashmap_map *m, char *keystring) { + unsigned int keystring_len = strlen(keystring); - unsigned long key = crc32((unsigned char *)(keystring), strlen(keystring)); + unsigned long key = custom_crc32((unsigned char *)(keystring), keystring_len); /* Robert Jenkins' 32 bit Mix Function */ key += (key << 12); diff --git a/custom_mutators/gramatron/testMakefile.mk b/custom_mutators/gramatron/testMakefile.mk new file mode 100644 index 00000000..0b2c6236 --- /dev/null +++ b/custom_mutators/gramatron/testMakefile.mk @@ -0,0 +1,2 @@ +test: test.c + gcc -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o test -I. -I/prg/dev/include test.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c ../../src/afl-performance.o json-c/.libs/libjson-c.a \ No newline at end of file diff --git a/custom_mutators/grammar_mutator/build_grammar_mutator.sh b/custom_mutators/grammar_mutator/build_grammar_mutator.sh index 15b8b1db..e8594ba3 100755 --- a/custom_mutators/grammar_mutator/build_grammar_mutator.sh +++ b/custom_mutators/grammar_mutator/build_grammar_mutator.sh @@ -119,7 +119,7 @@ else } fi -test -d grammar_mutator/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +test -f grammar_mutator/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } echo "[+] Got grammar mutator." cd "grammar_mutator" || exit 1 |