diff options
34 files changed, 602 insertions, 1009 deletions
diff --git a/.gitmodules b/.gitmodules index 8ba1c39d..18fda27e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "custom_mutators/gramatron/json-c"] path = custom_mutators/gramatron/json-c url = https://github.com/json-c/json-c -[submodule "utils/optimin/EvalMaxSAT"] - path = utils/optimin/EvalMaxSAT - url = https://github.com/FlorentAvellaneda/EvalMaxSAT [submodule "coresight_mode/patchelf"] path = coresight_mode/patchelf url = https://github.com/NixOS/patchelf.git diff --git a/GNUmakefile b/GNUmakefile index 072bd09d..42d48b68 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -696,6 +696,7 @@ install: all $(MANPAGES) @rm -f $${DESTDIR}$(BIN_PATH)/afl-plot.sh @rm -f $${DESTDIR}$(BIN_PATH)/afl-as @rm -f $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH)/afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH)/afl-gcc-rt.o + @for i in afl-llvm-dict2file.so afl-llvm-lto-instrumentlist.so afl-llvm-pass.so cmplog-instructions-pass.so cmplog-routines-pass.so cmplog-switches-pass.so compare-transform-pass.so libcompcov.so libdislocator.so libnyx.so libqasan.so libtokencap.so SanitizerCoverageLTO.so SanitizerCoveragePCGUARD.so split-compares-pass.so split-switches-pass.so; do echo rm -fv $${DESTDIR}$(HELPER_PATH)/$${i}; done install -m 755 $(PROGS) $(SH_PROGS) $${DESTDIR}$(BIN_PATH) @if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi @if [ -f utils/plot_ui/afl-plot-ui ]; then install -m 755 utils/plot_ui/afl-plot-ui $${DESTDIR}$(BIN_PATH); fi diff --git a/afl-cmin b/afl-cmin index 853c9398..71723c70 100755 --- a/afl-cmin +++ b/afl-cmin @@ -135,6 +135,12 @@ function exists_and_is_executable(binarypath) { } BEGIN { + if (0 != system( "test -t 1")) { + redirected = 1 + } else { + redirected = 0 + } + print "corpus minimization tool for afl++ (awk version)\n" # defaults @@ -463,7 +469,8 @@ BEGIN { while (cur < in_count) { fn = infilesSmallToBig[cur] ++cur - printf "\r Processing file "cur"/"in_count + if (redirected == 0) { printf "\r Processing file "cur"/"in_count } + else { print " Processing file "cur"/"in_count } # create path for the trace file from afl-showmap tracefile_path = trace_dir"/"fn # gather all keys, and count them @@ -502,7 +509,9 @@ BEGIN { key = field[nrFields] ++tcnt; - printf "\r Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." + if (redirected == 0) { printf "\r Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." } + else { print " Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." } + if (key in keyAlreadyKnown) { continue } diff --git a/coresight_mode/patchelf b/coresight_mode/patchelf -Subproject 7ec8edbe094ee13c91dadca191f92b9dfac8c0f +Subproject be0cc30a59b2755844bcd48823f6fbc8d97b93a diff --git a/custom_mutators/gramatron/automaton-parser.c b/custom_mutators/gramatron/automaton-parser.c new file mode 100644 index 00000000..3265e0cf --- /dev/null +++ b/custom_mutators/gramatron/automaton-parser.c @@ -0,0 +1,367 @@ +#include "afl-fuzz.h" +#include "automaton-parser.h" + +int free_terminal_arr(any_t placeholder, any_t item) { + struct terminal_arr* tmp = item; + free(tmp->start); + free(tmp); + return MAP_OK; +} + +int compare_two_symbols(const void * a, const void * b) { + char* a_char = *(char **)a; + char* b_char = *(char **)b; + size_t fa = strlen(a_char); + size_t fb = strlen(b_char); + if (fa > fb) return -1; + else if (fa == fb) return 0; + else return 1; + +} + +// TODO: create a map +// key: first character of a symbol, value: a list of symbols that starts with key, the list is sorted in descending order of the symbol lengths +map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols, struct symbols_arr *first_chars) { + map_t char_to_symbols = hashmap_new(); + // TODO: free the allocated map + // sort the symbol_dict in descending order of the symbol lengths + qsort(symbols->symbols_arr, symbols->len, sizeof(char*), compare_two_symbols); + #ifdef DEBUG + printf("------ print after sort ------\n"); + print_symbols_arr(symbols); + #endif + size_t i; + int r; // response from hashmap get and put + for (i = 0; i < symbols->len; i++) { + char* symbol_curr = symbols->symbols_arr[i]; + // get first character from symbol_curr + char first_character[2]; + first_character[0] = symbol_curr[0]; + first_character[1] = '\0'; + #ifdef DEBUG + printf("****** Current symbol is %s, its first character is %s ******\n", symbol_curr, first_character); + #endif + // key would be the first character of symbol_curr + // the value would be an array of chars + struct symbols_arr* associated_symbols; + r = hashmap_get(char_to_symbols, first_character, (any_t*)&associated_symbols); + if (!r) { + // append current symbol to existing array + #ifdef DEBUG + printf("****** First character %s is already in hashmap ******\n", first_character); + #endif + if(!add_element_to_symbols_arr(associated_symbols, symbol_curr, strlen(symbol_curr) + 1)) { + free_hashmap(char_to_symbols, &free_array_of_chars); + return NULL; + } + } + else { + // start a new symbols_arr + #ifdef DEBUG + printf("****** First character %s is not in hashmap ******\n", first_character); + #endif + struct symbols_arr* new_associated_symbols = create_array_of_chars(); + strncpy(first_chars->symbols_arr[first_chars->len], first_character, 2); // 2 because one character plus the NULL byte + add_element_to_symbols_arr(new_associated_symbols, symbol_curr, strlen(symbol_curr) + 1); + r = hashmap_put(char_to_symbols, first_chars->symbols_arr[first_chars->len], new_associated_symbols); + first_chars->len++; + #ifdef DEBUG + if (r) { + printf("hashmap put failed\n"); + } + else { + printf("hashmap put succeeded\n"); + } + #endif + } + } + printf("****** Testing ******\n"); + struct symbols_arr* tmp_arr; + char str[] = "i"; + int t = hashmap_get(char_to_symbols, str, (any_t *)&tmp_arr); + if (!t) + print_symbols_arr(tmp_arr); + return char_to_symbols; +} + +struct symbols_arr* create_array_of_chars() { + struct symbols_arr* ret = (struct symbols_arr*)malloc(sizeof(struct symbols_arr)); + ret->len = 0; + ret->symbols_arr = (char **)malloc(MAX_TERMINAL_NUMS * sizeof(char*)); + size_t i; + for (i = 0; i < MAX_TERMINAL_NUMS; i++) { + ret->symbols_arr[i] = (char *)calloc(MAX_TERMINAL_LENGTH, sizeof(char)); + } + return ret; +} + +// map a symbol to a list of (state, trigger_idx) +map_t create_pda_hashmap(state* pda, struct symbols_arr* symbols_arr) { + int state_idx, trigger_idx, r; // r is the return result for hashmap operation + map_t m = hashmap_new(); + // iterate over pda + for (state_idx = 0; state_idx < numstates; state_idx++) { + #ifdef DEBUG + printf("------ The state idx is %d ------\n", state_idx); + #endif + if (state_idx == final_state) continue; + state* state_curr = pda + state_idx; + for (trigger_idx = 0; trigger_idx < state_curr->trigger_len; trigger_idx++) { + #ifdef DEBUG + printf("------ The trigger idx is %d ------\n", trigger_idx); + #endif + trigger* trigger_curr = state_curr->ptr + trigger_idx; + char* symbol_curr = trigger_curr->term; + size_t symbol_len = trigger_curr->term_len; + struct terminal_arr* terminal_arr_curr; + r = hashmap_get(m, symbol_curr, (any_t*)&terminal_arr_curr); + if (r) { + // the symbol is not in the map + if (!add_element_to_symbols_arr(symbols_arr, symbol_curr, symbol_len+1)) { + // the number of symbols exceed maximual number + free_hashmap(m, &free_terminal_arr); + return NULL; + } + #ifdef DEBUG + printf("Symbol %s is not in map\n", symbol_curr); + #endif + struct terminal_arr* new_terminal_arr = (struct terminal_arr*)malloc(sizeof(struct terminal_arr)); + new_terminal_arr->start = (struct terminal_meta*)calloc(numstates, sizeof(struct terminal_meta)); + #ifdef DEBUG + printf("allocate new memory address %p\n", new_terminal_arr->start); + #endif + new_terminal_arr->start->state_name = state_idx; + new_terminal_arr->start->dest = trigger_curr->dest; + new_terminal_arr->start->trigger_idx = trigger_idx; + new_terminal_arr->len = 1; + #ifdef DEBUG + printf("Symbol %s is included in %zu edges\n", symbol_curr, new_terminal_arr->len); + #endif + r = hashmap_put(m, symbol_curr, new_terminal_arr); + #ifdef DEBUG + if (r) { + printf("hashmap put failed\n"); + } + else { + printf("hashmap put succeeded\n"); + } + #endif + // if symbol not already in map, it's not in symbol_dict, simply add the symbol to the array + // TODO: need to initialize symbol dict (calloc) + } + else { + // the symbol is already in map + // append to terminal array + // no need to touch start + #ifdef DEBUG + printf("Symbol %s is in map\n", symbol_curr); + #endif + struct terminal_meta* modify = terminal_arr_curr->start + terminal_arr_curr->len; + modify->state_name = state_idx; + modify->trigger_idx = trigger_idx; + modify->dest = trigger_curr->dest; + terminal_arr_curr->len++; + #ifdef DEBUG + printf("Symbol %s is included in %zu edges\n", symbol_curr, terminal_arr_curr->len); + #endif + // if symbol already in map, it's already in symbol_dict as well, no work needs to be done + } + + } + } + return m; +} + +void print_symbols_arr(struct symbols_arr* arr) { + size_t i; + printf("("); + for (i = 0; i < arr->len; i++) { + printf("%s", arr->symbols_arr[i]); + if (i != arr->len - 1) printf(","); + } + printf(")\n"); +} + +void free_hashmap(map_t m, int (*f)(any_t, any_t)) { + if (!m) { + printf("m map is empty\n"); + return; + } + int r = hashmap_iterate(m, f, NULL); + #ifdef DEBUG + if (!r) printf("free hashmap items successfully!\n"); + else printf("free hashmap items failed"); + #endif + hashmap_free(m); +} + +int free_array_of_chars(any_t placeholder, any_t item) { + if (!item) { + printf("item is empty\n"); + return MAP_MISSING; + } + struct symbols_arr* arr = item; + size_t i; + for (i = 0; i < MAX_TERMINAL_NUMS; i++) { + free(arr->symbols_arr[i]); + } + free(arr->symbols_arr); + free(arr); + return MAP_OK; +} + +void free_pda(state* pda) { + if (!pda) { + printf("pda is null\n"); + return; + } + size_t i, j; + for (i = 0; i < numstates; i++) { + state* state_curr = pda + i; + for (j = 0; j < state_curr->trigger_len; j++) { + trigger* trigger_curr = state_curr->ptr + j; + free(trigger_curr->id); + free(trigger_curr->term); + } + free(state_curr->ptr); + } + free(pda); +} + +int dfs(struct terminal_arr** tmp, const char* program, const size_t program_length, struct terminal_arr** res, size_t idx, int curr_state) { + if (*res) return 1; // 1 means successfully found a path + if (idx == program_length) { + // test if the last terminal points to the final state + if (curr_state != final_state) return 0; + *res = *tmp; + return 1; + } + if ((*tmp)->len == MAX_PROGRAM_WALK_LENGTH) { + printf("Reached maximum program walk length\n"); + return 0; + } + char first_char[2]; + first_char[0] = program[idx]; // first character of program + first_char[1] = '\0'; + int r; + struct symbols_arr* matching_symbols; + r = hashmap_get(first_char_to_symbols_map, first_char, (any_t *)&matching_symbols); + if (r) { + printf("No symbols match the current character, abort!"); // hopefully won't reach this state + return 0; + } + size_t i; + bool matched = false; + for (i = 0; i < matching_symbols->len; i++) { + if (matched) break; + char *matching_symbol = matching_symbols->symbols_arr[i]; + if (!strncmp(matching_symbol, program + idx, strlen(matching_symbol))) { + // there is a match + matched = true; + // find the possible paths of that symbol + struct terminal_arr* ta; + int r2 = hashmap_get(pda_map, matching_symbol, (any_t *)&ta); + if (!r2) { + // the terminal is found in the dictionary + size_t j; + for (j = 0; j < ta->len; j++) { + int state_name = (ta->start + j)->state_name; + if (state_name != curr_state) continue; + size_t trigger_idx = (ta->start + j)->trigger_idx; + int dest = (ta->start + j)->dest; + (*tmp)->start[(*tmp)->len].state_name = state_name; + (*tmp)->start[(*tmp)->len].trigger_idx = trigger_idx; + (*tmp)->start[(*tmp)->len].dest = dest; + (*tmp)->len++; + if (dfs(tmp, program, program_length, res, idx + strlen(matching_symbol), dest)) return 1; + (*tmp)->len--; + } + } + else { + printf("No path goes out of this symbol, abort!"); // hopefully won't reach this state + return 0; + } + } + } + return 0; + /* + 1. First extract the first character of the current program + 2. Match the possible symbols of that program + 3. Find the possible paths of that symbol + 4. Add to temporary terminal array + 5. Recursion + 6. Pop the path from the terminal array + 7. - If idx reaches end of program, set tmp to res + - If idx is not at the end and nothing matches, the current path is not working, simply return 0 + */ +} + +Array* constructArray(struct terminal_arr* terminal_arr, state* pda) { + Array * res = (Array *)calloc(1, sizeof(Array)); + initArray(res, INIT_SIZE); + size_t i; + for (i = 0; i < terminal_arr->len; i ++) { + struct terminal_meta* curr = terminal_arr->start + i; + int state_name = curr->state_name; + int trigger_idx = curr->trigger_idx; + // get the symbol from pda + state* state_curr = pda + state_name; + trigger* trigger_curr = state_curr->ptr + trigger_idx; + char *symbol_curr = trigger_curr->term; + size_t symbol_curr_len = trigger_curr->term_len; + insertArray(res, state_name, symbol_curr, symbol_curr_len, trigger_idx); + } + return res; +} + +Array* automaton_parser(const uint8_t *seed_fn) { + Array* parsed_res = NULL; + FILE* ptr; + ptr = fopen(seed_fn, "r"); + if (ptr == NULL) { + printf("file can't be opened \n"); + fclose(ptr); + return NULL; + } + char ch; + char program[MAX_PROGRAM_LENGTH]; + int i = 0; + bool program_too_long = false; + do { + if (i == MAX_PROGRAM_LENGTH) { + // the maximum program length is reached + printf("maximum program length is reached, give up the current seed\n"); + program_too_long = true; + break; + } + ch = fgetc(ptr); + program[i] = ch; + i ++; + } while (ch != EOF); + program[i-1] = '\0'; + fclose(ptr); + if ((i == 1 && program[0] == '\0') || program_too_long) return NULL; + struct terminal_arr* arr_holder; + struct terminal_arr* dfs_res = NULL; + arr_holder = (struct terminal_arr*)calloc(1, sizeof(struct terminal_arr)); + arr_holder->start = (struct terminal_meta*)calloc(MAX_PROGRAM_WALK_LENGTH, sizeof(struct terminal_meta)); + int dfs_success = dfs(&arr_holder, program, strlen(program), &dfs_res, 0, init_state); + // printf("*** return value %d *** \n", dfs_success); + if (dfs_success) { + parsed_res = constructArray(dfs_res, pda); + } + free(arr_holder->start); + free(arr_holder); + return parsed_res; +} + +// return 0 if fails +// return 1 if succeeds +int add_element_to_symbols_arr(struct symbols_arr* symbols_arr, char* symbol, size_t symbol_len) { + if (symbols_arr->len >= MAX_TERMINAL_NUMS || symbol_len >= MAX_TERMINAL_LENGTH) { + return 0; + } + strncpy(symbols_arr->symbols_arr[symbols_arr->len], symbol, symbol_len); + symbols_arr->len++; + return 1; +} \ No newline at end of file diff --git a/custom_mutators/gramatron/automaton-parser.h b/custom_mutators/gramatron/automaton-parser.h new file mode 100644 index 00000000..d67a1679 --- /dev/null +++ b/custom_mutators/gramatron/automaton-parser.h @@ -0,0 +1,74 @@ +#ifndef _AUTOMATON_PARSER_H +#define _AUTOMATON_PARSER_H + +#define NUMINPUTS 500 +#define MAX_PROGRAM_LENGTH 20000 +#define MAX_PROGRAM_WALK_LENGTH 5000 +#define MAX_TERMINAL_NUMS 5000 +#define MAX_TERMINAL_LENGTH 1000 +#define MAX_PROGRAM_NAME_LENGTH 5000 + +#include "gramfuzz.h" + +// represents an edge in the FSA +struct terminal_meta { + + int state_name; + int trigger_idx; + int dest; + +} ; + +// represents a set of edges +struct terminal_arr { + + struct terminal_meta* start; + size_t len; + +} ; + +// essentially a string array +struct symbols_arr { + char** symbols_arr; + size_t len; +} ; + +struct symbols_arr* symbols; // symbols contain all the symbols in the language +map_t pda_map; // a map that maps each symbol in the language to a set of edges +struct symbols_arr* first_chars; // an array of first characters, only temperary array +map_t first_char_to_symbols_map; // a map that maps each first character to a set of symbols (the symbols are sorted in descending order) + + + +// freeing terminal arrays +int free_terminal_arr(any_t placeholder, any_t item); + +// return a map that maps each symbol in the language to a set of edges +// populate symbols_arr with all the symbols in the language +map_t create_pda_hashmap(state* pda, struct symbols_arr* symbols_arr); + +// print the string array +void print_symbols_arr(struct symbols_arr* arr); + +// free hashmap +// the function pointer contains function to free the values in the hashmap +void free_hashmap(map_t m, int (*f)(any_t, any_t)); + +// free string array +int free_array_of_chars(any_t placeholder, any_t item); + +// free the pda +void free_pda(state* pda); + +// create a string array +struct symbols_arr* create_array_of_chars(); + +map_t create_first_char_to_symbols_hashmap(struct symbols_arr *symbols, struct symbols_arr *first_chars); + +// return the automaton represented by the seed +Array* automaton_parser(const uint8_t *seed_fn); + +int add_element_to_symbols_arr(struct symbols_arr* symbols_arr, char* symbol, size_t symbol_len); + + +#endif \ No newline at end of file diff --git a/custom_mutators/gramatron/build_gramatron_mutator.sh b/custom_mutators/gramatron/build_gramatron_mutator.sh index 9952e7f5..0638e3b2 100755 --- a/custom_mutators/gramatron/build_gramatron_mutator.sh +++ b/custom_mutators/gramatron/build_gramatron_mutator.sh @@ -125,7 +125,7 @@ else } fi -test -d json-c/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +test -f json-c/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } echo "[+] Got json-c." test -e json-c/.libs/libjson-c.a || { @@ -144,6 +144,6 @@ echo echo echo "[+] Json-c successfully prepared!" echo "[+] Builing gramatron now." -$CC -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c ../../src/afl-performance.o json-c/.libs/libjson-c.a || exit 1 +$CC -O3 -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o gramatron.so -shared -I. -I/prg/dev/include gramfuzz.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c automaton-parser.c ../../src/afl-performance.o json-c/.libs/libjson-c.a || exit 1 echo echo "[+] gramatron successfully built!" diff --git a/custom_mutators/gramatron/gramfuzz.c b/custom_mutators/gramatron/gramfuzz.c index 9c9dbb43..ccdbbe60 100644 --- a/custom_mutators/gramatron/gramfuzz.c +++ b/custom_mutators/gramatron/gramfuzz.c @@ -9,6 +9,7 @@ #include "afl-fuzz.h" #include "gramfuzz.h" +#include "automaton-parser.h" #define MUTATORS 4 // Specify the total number of mutators @@ -163,6 +164,11 @@ my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) { if (automaton_file) { pda = create_pda(automaton_file); + symbols = create_array_of_chars(); + pda_map = create_pda_hashmap((struct state*)pda, symbols); + print_symbols_arr(symbols); + first_chars = create_array_of_chars(); + first_char_to_symbols_map = create_first_char_to_symbols_hashmap(symbols, first_chars); } else { @@ -281,12 +287,25 @@ u8 afl_custom_queue_new_entry(my_mutator_t * data, // filename_new_queue,filename_orig_queue,automaton_fn); if (filename_orig_queue) { - - write_input(data->mutated_walk, automaton_fn); + if (data->mutated_walk) { + write_input(data->mutated_walk, automaton_fn); + } + else { + Array* parsed_walk = automaton_parser(filename_new_queue); + if (!parsed_walk) PFATAL("Parser unsuccessful on %s", filename_new_queue); + write_input(parsed_walk, automaton_fn); + free(parsed_walk->start); + free(parsed_walk); + } } else { - new_input = gen_input(pda, NULL); + // TODO: try to parse the input seeds here, if they can be parsed, then generate the corresponding automaton file + // if not, then generate a new input + new_input = automaton_parser(filename_new_queue); + if (new_input == NULL) { + new_input = gen_input(pda, NULL); + } write_input(new_input, automaton_fn); // Update the placeholder file @@ -328,6 +347,16 @@ uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) { // get the filename u8 * automaton_fn = alloc_printf("%s.aut", filename); + // find the automaton file, if the automaton file cannot be found, do not fuzz the current entry on the queue + FILE *fp; + fp = fopen(automaton_fn, "rb"); + if (fp == NULL) { + + printf("File '%s' does not exist, exiting. Would not fuzz current entry on the queue\n", automaton_fn); + return 0; + + } + IdxMap_new *statemap_ptr; terminal * term_ptr; int state; @@ -424,6 +453,10 @@ void afl_custom_deinit(my_mutator_t *data) { free(data->mutator_buf); free(data); - + free_hashmap(pda_map, &free_terminal_arr); + free_hashmap(first_char_to_symbols_map, &free_array_of_chars); + free_pda(pda); + free_array_of_chars(NULL, symbols); // free the array of symbols + free_array_of_chars(NULL, first_chars); } diff --git a/custom_mutators/gramatron/hashmap.c b/custom_mutators/gramatron/hashmap.c index 09715b87..4f97e085 100644 --- a/custom_mutators/gramatron/hashmap.c +++ b/custom_mutators/gramatron/hashmap.c @@ -151,7 +151,7 @@ static unsigned long crc32_tab[] = { /* Return a 32-bit CRC of the contents of the buffer. */ -unsigned long crc32(const unsigned char *s, unsigned int len) { +unsigned long custom_crc32(const unsigned char *s, unsigned int len) { unsigned int i; unsigned long crc32val; @@ -171,8 +171,9 @@ unsigned long crc32(const unsigned char *s, unsigned int len) { * Hashing function for a string */ unsigned int hashmap_hash_int(hashmap_map *m, char *keystring) { + unsigned int keystring_len = strlen(keystring); - unsigned long key = crc32((unsigned char *)(keystring), strlen(keystring)); + unsigned long key = custom_crc32((unsigned char *)(keystring), keystring_len); /* Robert Jenkins' 32 bit Mix Function */ key += (key << 12); diff --git a/custom_mutators/gramatron/json-c b/custom_mutators/gramatron/json-c -Subproject af8dd4a307e7b837f9fa2959549548ace4afe08 +Subproject 11546bfd07a575c47416924cb98de3d33a4e642 diff --git a/custom_mutators/gramatron/testMakefile.mk b/custom_mutators/gramatron/testMakefile.mk new file mode 100644 index 00000000..0b2c6236 --- /dev/null +++ b/custom_mutators/gramatron/testMakefile.mk @@ -0,0 +1,2 @@ +test: test.c + gcc -g -fPIC -Wno-unused-result -Wl,--allow-multiple-definition -I../../include -o test -I. -I/prg/dev/include test.c gramfuzz-helpers.c gramfuzz-mutators.c gramfuzz-util.c hashmap.c ../../src/afl-performance.o json-c/.libs/libjson-c.a \ No newline at end of file diff --git a/custom_mutators/grammar_mutator/build_grammar_mutator.sh b/custom_mutators/grammar_mutator/build_grammar_mutator.sh index 15b8b1db..e8594ba3 100755 --- a/custom_mutators/grammar_mutator/build_grammar_mutator.sh +++ b/custom_mutators/grammar_mutator/build_grammar_mutator.sh @@ -119,7 +119,7 @@ else } fi -test -d grammar_mutator/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +test -f grammar_mutator/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } echo "[+] Got grammar mutator." cd "grammar_mutator" || exit 1 diff --git a/docs/Changelog.md b/docs/Changelog.md index b18bf30f..6269e3b1 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -41,6 +41,9 @@ sending a mail to <afl-users+subscribe@googlegroups.com>. - update to new frida release, handles now c++ throw/catch - unicorn_mode: - update unicorn engine, fix C example + - utils: + - removed optimin because it looses coverage due a bug and is + unmaintained :-( ### Version ++4.00c (release) diff --git a/docs/env_variables.md b/docs/env_variables.md index fe9c6e07..3c69c0b6 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -517,6 +517,10 @@ checks or alter some of the more exotic semantics of the tool: (empty/non present) will add no tags to the metrics. For more information, see [rpc_statsd.md](rpc_statsd.md). + - `AFL_SYNC_TIME` allows you to specify a different minimal time (in minutes) + between fuzzing instances synchronization. Default sync time is 30 minutes, + note that time is halfed for -M main nodes. + - Setting `AFL_TARGET_ENV` causes AFL++ to set extra environment variables for the target binary. Example: `AFL_TARGET_ENV="VAR1=1 VAR2='a b c'" afl-fuzz ... `. This exists mostly for things like `LD_LIBRARY_PATH` but it would diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 9992e841..24af426f 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -577,7 +577,8 @@ typedef struct afl_state { last_find_time, /* Time for most recent path (ms) */ last_crash_time, /* Time for most recent crash (ms) */ last_hang_time, /* Time for most recent hang (ms) */ - exit_on_time; /* Delay to exit if no new paths */ + exit_on_time, /* Delay to exit if no new paths */ + sync_time; /* Sync time (ms) */ u32 slowest_exec_ms, /* Slowest testcase non hang in ms */ subseq_tmouts; /* Number of timeouts in a row */ diff --git a/include/envs.h b/include/envs.h index 25b792fa..f4cccc96 100644 --- a/include/envs.h +++ b/include/envs.h @@ -206,6 +206,7 @@ static char *afl_environment_variables[] = { "AFL_STATSD_HOST", "AFL_STATSD_PORT", "AFL_STATSD_TAGS_FLAVOR", + "AFL_SYNC_TIME", "AFL_TESTCACHE_SIZE", "AFL_TESTCACHE_ENTRIES", "AFL_TMIN_EXACT", diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index db7ac7b0..f3a16e95 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -327,6 +327,41 @@ static void __afl_map_shm(void) { } + if (!id_str && __afl_area_ptr_dummy == __afl_area_initial) { + + u32 val = 0; + u8 *ptr; + + if ((ptr = getenv("AFL_MAP_SIZE")) != NULL) val = atoi(ptr); + + if (val > MAP_INITIAL_SIZE) { + + __afl_map_size = val; + __afl_area_ptr_dummy = malloc(__afl_map_size); + if (!__afl_area_ptr_dummy) { + + fprintf(stderr, + "Error: AFL++ could not aquire %u bytes of memory, exiting!\n", + __afl_map_size); + exit(-1); + + } + + } else { + + __afl_map_size = MAP_INITIAL_SIZE; + + } + + if (__afl_debug) { + + fprintf(stderr, "DEBUG: (0) init map size is %u to %p\n", __afl_map_size, + __afl_area_ptr_dummy); + + } + + } + /* If we're running under AFL, attach to the appropriate region, replacing the early-stage __afl_area_initial region that is needed to allow some really hacky .init code to work correctly in projects such as OpenSSL. */ @@ -465,18 +500,26 @@ static void __afl_map_shm(void) { } - } else if (_is_sancov && __afl_area_ptr != __afl_area_initial) { - - free(__afl_area_ptr); - __afl_area_ptr = NULL; + } else if (__afl_final_loc > __afl_map_size) { - if (__afl_final_loc > MAP_INITIAL_SIZE) { + if (__afl_area_initial != __afl_area_ptr_dummy) { - __afl_area_ptr = (u8 *)malloc(__afl_final_loc); + free(__afl_area_ptr_dummy); } - if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_ptr_dummy; } + __afl_area_ptr_dummy = (u8 *)malloc(__afl_final_loc); + __afl_area_ptr = __afl_area_ptr_dummy; + __afl_map_size = __afl_final_loc; + + if (!__afl_area_ptr_dummy) { + + fprintf(stderr, + "Error: AFL++ could not aquire %u bytes of memory, exiting!\n", + __afl_final_loc); + exit(-1); + + } } @@ -487,7 +530,7 @@ static void __afl_map_shm(void) { fprintf(stderr, "DEBUG: (2) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, " "__afl_area_ptr_dummy %p, __afl_map_addr 0x%llx, MAP_SIZE " - "%u, __afl_final_loc %u, __afl_map_size %u," + "%u, __afl_final_loc %u, __afl_map_size %u, " "max_size_forkserver %u/0x%x\n", id_str == NULL ? "<null>" : id_str, __afl_area_ptr, __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE, diff --git a/instrumentation/COPYING3 b/instrumentation/gcc_plugin.COPYING3 index 94a9ed02..b0a36be4 100644 --- a/instrumentation/COPYING3 +++ b/instrumentation/gcc_plugin.COPYING3 @@ -1,3 +1,8 @@ +NOTE: +This license applies only to the gcc_plugin code "afl-gcc-pass.so.cc" as +gcc is GPL3 too. + + GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index 09e773f0..5703a66a 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -130,11 +130,7 @@ write_to_testcase(afl_state_t *afl, void **mem, u32 len, u32 fix) { } - if (new_mem != *mem) { - - *mem = new_mem; - - } + if (new_mem != *mem) { *mem = new_mem; } /* everything as planned. use the potentially new data. */ afl_fsrv_write_to_testcase(&afl->fsrv, *mem, new_size); diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c index 98217438..8334af75 100644 --- a/src/afl-fuzz-state.c +++ b/src/afl-fuzz-state.c @@ -101,6 +101,7 @@ void afl_state_init(afl_state_t *afl, uint32_t map_size) { afl->stats_update_freq = 1; afl->stats_avg_exec = 0; afl->skip_deterministic = 1; + afl->sync_time = SYNC_TIME; afl->cmplog_lvl = 2; afl->min_length = 1; afl->max_length = MAX_FILE; @@ -519,6 +520,24 @@ void read_afl_environment(afl_state_t *afl, char **envp) { } + } else if (!strncmp(env, "AFL_SYNC_TIME", + + afl_environment_variable_len)) { + + int time = atoi((u8 *)get_afl_env(afl_environment_variables[i])); + if (time > 0) { + + afl->sync_time = time * (60 * 1000LL); + + } else { + + WARNF( + "incorrect value for AFL_SYNC_TIME environment variable, " + "used default value %lld instead.", + afl->sync_time / 60 / 1000); + + } + } } else { diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c index 5b237748..3e034b83 100644 --- a/src/afl-fuzz-stats.c +++ b/src/afl-fuzz-stats.c @@ -59,7 +59,7 @@ void write_setup_file(afl_state_t *afl, u32 argc, char **argv) { if (i) fprintf(f, " "); #ifdef __ANDROID__ - if (memchr(argv[i], '\'', sizeof(argv[i]))) { + if (memchr(argv[i], '\'', strlen(argv[i]))) { #else if (index(argv[i], '\'')) { diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index c5ab364a..7c33ba29 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -295,6 +295,7 @@ static void usage(u8 *argv0, int more_help) { "AFL_STATSD_TAGS_FLAVOR: set statsd tags format (default: disable tags)\n" " Supported formats are: 'dogstatsd', 'librato',\n" " 'signalfx' and 'influxdb'\n" + "AFL_SYNC_TIME: sync time between fuzzing instances (in minutes)\n" "AFL_TESTCACHE_SIZE: use a cache for testcases, improves performance (in MB)\n" "AFL_TMPDIR: directory to use for input file generation (ramdisk recommended)\n" "AFL_EARLY_FORKSERVER: force an early forkserver in an afl-clang-fast/\n" @@ -2511,7 +2512,7 @@ int main(int argc, char **argv_orig, char **envp) { if (unlikely(afl->is_main_node)) { if (unlikely(get_cur_time() > - (SYNC_TIME >> 1) + afl->last_sync_time)) { + (afl->sync_time >> 1) + afl->last_sync_time)) { if (!(sync_interval_cnt++ % (SYNC_INTERVAL / 3))) { @@ -2523,7 +2524,7 @@ int main(int argc, char **argv_orig, char **envp) { } else { - if (unlikely(get_cur_time() > SYNC_TIME + afl->last_sync_time)) { + if (unlikely(get_cur_time() > afl->sync_time + afl->last_sync_time)) { if (!(sync_interval_cnt++ % SYNC_INTERVAL)) { sync_fuzzers(afl); } diff --git a/unicorn_mode/UNICORNAFL_VERSION b/unicorn_mode/UNICORNAFL_VERSION index 77fc69b5..5e7234c6 100644 --- a/unicorn_mode/UNICORNAFL_VERSION +++ b/unicorn_mode/UNICORNAFL_VERSION @@ -1 +1 @@ -c3e15a7d +06796154996fef2d92ccd172181ee0cdf3631959 diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl -Subproject c3e15a7d44101ff288abe114b7954ce6cfa070b +Subproject 06796154996fef2d92ccd172181ee0cdf363195 diff --git a/utils/README.md b/utils/README.md index debc86e8..62d79193 100644 --- a/utils/README.md +++ b/utils/README.md @@ -56,8 +56,6 @@ Here's a quick overview of the stuff you can find in this directory: - libpng_no_checksum - a sample patch for removing CRC checks in libpng. - - optimin - An optimal corpus minimizer. - - persistent_mode - an example of how to use the LLVM persistent process mode to speed up certain fuzzing jobs. diff --git a/utils/libdislocator/libdislocator.so.c b/utils/libdislocator/libdislocator.so.c index bd08a678..c821a8f7 100644 --- a/utils/libdislocator/libdislocator.so.c +++ b/utils/libdislocator/libdislocator.so.c @@ -505,7 +505,10 @@ void *reallocarray(void *ptr, size_t elem_len, size_t elem_cnt) { } -#if !defined(__ANDROID__) +#if defined(__APPLE__) +size_t malloc_size(const void *ptr) { + +#elif !defined(__ANDROID__) size_t malloc_usable_size(void *ptr) { #else @@ -517,6 +520,15 @@ size_t malloc_usable_size(const void *ptr) { } +#if defined(__APPLE__) +size_t malloc_good_size(size_t len) { + + return (len & ~(ALLOC_ALIGN_SIZE - 1)) + ALLOC_ALIGN_SIZE; + +} + +#endif + __attribute__((constructor)) void __dislocator_init(void) { char *tmp = getenv("AFL_LD_LIMIT_MB"); diff --git a/utils/optimin/.gitignore b/utils/optimin/.gitignore deleted file mode 100644 index 46f42f8f..00000000 --- a/utils/optimin/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -CMakeLists.txt.user -CMakeCache.txt -CMakeFiles -CMakeScripts -Testing -Makefile -cmake_install.cmake -install_manifest.txt -compile_commands.json -CTestTestfile.cmake -_deps diff --git a/utils/optimin/CMakeLists.txt b/utils/optimin/CMakeLists.txt deleted file mode 100644 index b45dd004..00000000 --- a/utils/optimin/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -cmake_minimum_required(VERSION 3.10) - -project(optimin - LANGUAGES CXX - DESCRIPTION "MaxSAT-based fuzzing corpus minimizer" -) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") - -# Add LLVM -find_package(LLVM REQUIRED CONFIG) -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") - -include_directories(${LLVM_INCLUDE_DIRS}) -add_definitions(${LLVM_DEFINITIONS} -DNDEBUG) - -add_subdirectory(EvalMaxSAT) -add_subdirectory(src) diff --git a/utils/optimin/EVALMAXSAT_VERSION b/utils/optimin/EVALMAXSAT_VERSION deleted file mode 100644 index d836ff1c..00000000 --- a/utils/optimin/EVALMAXSAT_VERSION +++ /dev/null @@ -1 +0,0 @@ -440bf90edf88f6ab940934129e3c5b3b93764295 diff --git a/utils/optimin/EvalMaxSAT b/utils/optimin/EvalMaxSAT deleted file mode 160000 -Subproject 440bf90edf88f6ab940934129e3c5b3b9376429 diff --git a/utils/optimin/README.md b/utils/optimin/README.md deleted file mode 100644 index 340022b8..00000000 --- a/utils/optimin/README.md +++ /dev/null @@ -1,94 +0,0 @@ -# OptiMin - -OptiMin is a corpus minimizer that uses a -[MaxSAT](https://en.wikipedia.org/wiki/Maximum_satisfiability_problem) solver -to identify a subset of functionally distinct files that exercise different code -paths in a target program. - -Unlike most corpus minimizers, such as `afl-cmin`, OptiMin does not rely on -heuristic and/or greedy algorithms to identify these functionally distinct -files. This means that minimized corpora are generally much smaller than those -produced by other tools. - -## Building - -To build the `optimin` just execute the `build_optimin.sh` script. - -## Running - -Running `optimin` is the same as running `afl-cmin`: - -``` -./optimin -h -OVERVIEW: Optimal corpus minimizer -USAGE: optimin [options] <target program> [target args...] - -OPTIONS: - -Color Options: - - --color - Use colors in output (default=autodetect) - -General options: - - -C - Keep crashing inputs, reject everything else - -O - Use binary-only instrumentation (FRIDA mode) - -Q - Use binary-only instrumentation (QEMU mode) - -U - Use unicorn-based instrumentation (unicorn mode) - -f - Include edge hit counts - -i dir - Input directory - -m megs - Memory limit for child process (default=none) - -o dir - Output directory - -p - Display progress bar - -t msec - Run time limit for child process (default=5000) - -w csv - Weights file - -Generic Options: - - --help - Display available options (--help-hidden for more) - --help-list - Display list of available options (--help-list-hidden for more) - --version - Display the version of this program -``` - -Example: `optimin -i files -o seeds -- ./target @@` - -### Weighted Minimizations - -OptiMin allows for weighted minimizations. For examples, seeds can be weighted -by file size (or execution time), thus preferencing the selection of smaller (or -faster) seeds. - -To perform a weighted minimization, supply a CSV file with the `-w` option. This -CSV file is formatted as follows: - -``` -SEED_1,WEIGHT_1 -SEED_2,WEIGHT_2 -... -SEED_N,WEIGHT_N -``` - -Where `SEED_N` is the file name (**not** path) of a seed in the input directory, -and `WEIGHT_N` is an integer weight. - -## Further Details and Citation - -For more details, see the paper -[Seed Selection for Successful Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). -If you use OptiMin in your research, please cite this paper. - -BibTeX: - -```bibtex -@inproceedings{Herrera:2021:FuzzSeedSelection, - author = {Adrian Herrera and Hendra Gunadi and Shane Magrath and Michael Norrish and Mathias Payer and Antony L. Hosking}, - title = {Seed Selection for Successful Fuzzing}, - booktitle = {30th ACM SIGSOFT International Symposium on Software Testing and Analysis}, - series = {ISSTA}, - year = {2021}, - pages = {230--243}, - numpages = {14}, - location = {Virtual, Denmark}, - publisher = {Association for Computing Machinery}, -} -``` \ No newline at end of file diff --git a/utils/optimin/build_optimin.sh b/utils/optimin/build_optimin.sh deleted file mode 100755 index aee5d0c3..00000000 --- a/utils/optimin/build_optimin.sh +++ /dev/null @@ -1,131 +0,0 @@ -#!/bin/sh -# -# american fuzzy lop++ - optimin build script -# ------------------------------------------------ -# -# Originally written by Nathan Voss <njvoss99@gmail.com> -# -# Adapted from code by Andrew Griffiths <agriffiths@google.com> and -# Michal Zalewski -# -# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co> -# -# Copyright 2017 Battelle Memorial Institute. All rights reserved. -# Copyright 2019-2022 AFLplusplus Project. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# This script builds the OptiMin corpus minimizer. - -EVALMAXSAT_VERSION="$(cat ./EVALMAXSAT_VERSION)" -EVALMAXSAT_REPO="https://github.com/FlorentAvellaneda/EvalMaxSAT" - -echo "=================================================" -echo "OptiMin build script" -echo "=================================================" -echo - -echo "[*] Performing basic sanity checks..." - -PLT=`uname -s` - -if [ ! -f "../../config.h" ]; then - - echo "[-] Error: key files not found - wrong working directory?" - exit 1 - -fi - -LLVM_CONFIG="${LLVM_CONFIG:-llvm-config}" -CMAKECMD=cmake -MAKECMD=make -TARCMD=tar - -if [ "$PLT" = "Darwin" ]; then - CORES=`sysctl -n hw.ncpu` - TARCMD=tar -fi - -if [ "$PLT" = "FreeBSD" ]; then - MAKECMD=gmake - CORES=`sysctl -n hw.ncpu` - TARCMD=gtar -fi - -if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then - MAKECMD=gmake - CORES=`sysctl -n hw.ncpu` - TARCMD=gtar -fi - -PREREQ_NOTFOUND= -for i in git $CMAKECMD $MAKECMD $TARCMD; do - - T=`command -v "$i" 2>/dev/null` - - if [ "$T" = "" ]; then - - echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar." - PREREQ_NOTFOUND=1 - - fi - -done - -if echo "$CC" | grep -qF /afl-; then - - echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool." - PREREQ_NOTFOUND=1 - -fi - -if [ "$PREREQ_NOTFOUND" = "1" ]; then - exit 1 -fi - -echo "[+] All checks passed!" - -echo "[*] Making sure EvalMaxSAT is checked out" - -git status 1>/dev/null 2>/dev/null -if [ $? -eq 0 ]; then - echo "[*] initializing EvalMaxSAT submodule" - git submodule init || exit 1 - git submodule update ./EvalMaxSAT 2>/dev/null # ignore errors -else - echo "[*] cloning EvalMaxSAT" - test -d EvalMaxSAT || { - CNT=1 - while [ '!' -d EvalMaxSAT -a "$CNT" -lt 4 ]; do - echo "Trying to clone EvalMaxSAT (attempt $CNT/3)" - git clone "$EVALMAXSAT_REPO" - CNT=`expr "$CNT" + 1` - done - } -fi - -test -d EvalMaxSAT || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } -echo "[+] Got EvalMaxSAT." - -cd "EvalMaxSAT" || exit 1 -echo "[*] Checking out $EVALMAXSAT_VERSION" -sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null -git checkout "$EVALMAXSAT_VERSION" || exit 1 -cd .. - -echo -echo -echo "[+] EvalMaxSAT successfully prepared!" -echo "[+] Building OptiMin now." -mkdir -p build -cd build || exit 1 -cmake .. -DLLVM_DIR=`$LLVM_CONFIG --cmakedir` || exit 1 -make -j$CORES || exit 1 -cd .. -echo -cp -fv build/src/optimin . || exit 1 -echo "[+] OptiMin successfully built!" diff --git a/utils/optimin/src/CMakeLists.txt b/utils/optimin/src/CMakeLists.txt deleted file mode 100644 index 693f63f2..00000000 --- a/utils/optimin/src/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -add_executable(optimin OptiMin.cpp) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") - -foreach(LIB MaLib EvalMaxSAT glucose) - target_include_directories(optimin PRIVATE - "${CMAKE_SOURCE_DIR}/EvalMaxSAT/lib/${LIB}/src") - target_link_libraries(optimin ${LIB}) -endforeach(LIB) - -llvm_map_components_to_libnames(LLVM_LIBS support) -target_link_libraries(optimin ${LLVM_LIBS}) - -install(TARGETS optimin RUNTIME DESTINATION bin) diff --git a/utils/optimin/src/OptiMin.cpp b/utils/optimin/src/OptiMin.cpp deleted file mode 100644 index b0082d14..00000000 --- a/utils/optimin/src/OptiMin.cpp +++ /dev/null @@ -1,702 +0,0 @@ -/* - * OptiMin, an optimal fuzzing corpus minimizer. - * - * Author: Adrian Herrera - */ - -#include <cstdint> -#include <cstdlib> -#include <vector> - -#include <llvm/ADT/DenseSet.h> -#include <llvm/ADT/DenseMap.h> -#include <llvm/ADT/SmallVector.h> -#include <llvm/ADT/StringExtras.h> -#include <llvm/ADT/StringMap.h> -#include <llvm/Support/Chrono.h> -#include <llvm/Support/CommandLine.h> -#include <llvm/Support/Error.h> -#include <llvm/Support/FileSystem.h> -#include <llvm/Support/MemoryBuffer.h> -#include <llvm/Support/Path.h> -#include <llvm/Support/Program.h> -#include <llvm/Support/WithColor.h> - -#include "EvalMaxSAT.h" - -using namespace llvm; - -namespace { - -// -------------------------------------------------------------------------- // -// Classes -// -------------------------------------------------------------------------- // - -/// Ensure seed weights default to 1 -class Weight { - - public: - Weight() : Weight(1){}; - Weight(uint32_t V) : Value(V){}; - - operator unsigned() const { - - return Value; - - } - - private: - const unsigned Value; - -}; - -// -------------------------------------------------------------------------- // -// Typedefs -// -------------------------------------------------------------------------- // - -/// AFL tuple (edge) ID -using AFLTupleID = uint32_t; - -/// Pair of tuple ID and hit count -using AFLTuple = std::pair<AFLTupleID, /* Frequency */ unsigned>; - -/// Coverage for a given seed file -using AFLCoverageVector = std::vector<AFLTuple>; - -/// Map seed file paths to its coverage vector -using AFLCoverageMap = StringMap<AFLCoverageVector>; - -/// Map seed file paths to a weight -using WeightsMap = StringMap<Weight>; - -/// A seed identifier in the MaxSAT solver -using SeedID = int; - -/// Associates seed identifiers to seed files -using MaxSATSeeds = - SmallVector<std::pair<SeedID, /* Seed file */ std::string>, 0>; - -/// Set of literal identifiers -using MaxSATSeedSet = DenseSet<SeedID>; - -/// Maps tuple IDs to the literal identifiers that "cover" that tuple -using MaxSATCoverageMap = DenseMap<AFLTupleID, MaxSATSeedSet>; - -// -------------------------------------------------------------------------- // -// Global variables -// -------------------------------------------------------------------------- // - -// This is based on the human class count in `count_class_human[256]` in -// `afl-showmap.c` -static constexpr uint32_t MAX_EDGE_FREQ = 8; - -// The maximum number of failures allowed when parsing a weights file -static constexpr unsigned MAX_WEIGHT_FAILURES = 5; - -static sys::TimePoint<> StartTime, EndTime; -static std::chrono::seconds Duration; - -static std::string ShowmapPath; -static bool TargetArgsHasAtAt = false; -static bool KeepTraces = false; -static bool SkipBinCheck = false; - -static const auto ErrMsg = [] { - - return WithColor(errs(), raw_ostream::RED, /*Bold=*/true) << "[-] "; - -}; - -static const auto WarnMsg = [] { - - return WithColor(errs(), raw_ostream::MAGENTA, /*Bold=*/true) << "[-] "; - -}; - -static const auto SuccMsg = [] { - - return WithColor(outs(), raw_ostream::GREEN, /*Bold=*/true) << "[+] "; - -}; - -static const auto StatMsg = [] { - - return WithColor(outs(), raw_ostream::BLUE, /*Bold=*/true) << "[*] "; - -}; - -static cl::opt<std::string> InputDir("i", cl::desc("Input directory"), - cl::value_desc("dir"), cl::Required); -static cl::opt<std::string> OutputDir("o", cl::desc("Output directory"), - cl::value_desc("dir"), cl::Required); - -static cl::opt<bool> EdgesOnly("f", cl::desc("Include edge hit counts"), - cl::init(true)); -static cl::opt<std::string> WeightsFile("w", cl::desc("Weights file"), - cl::value_desc("csv")); - -static cl::opt<std::string> TargetProg(cl::Positional, - cl::desc("<target program>"), - cl::Required); -static cl::list<std::string> TargetArgs(cl::ConsumeAfter, - cl::desc("[target args...]")); - -static cl::opt<std::string> MemLimit( - "m", cl::desc("Memory limit for child process (default=none)"), - cl::value_desc("megs"), cl::init("none")); -static cl::opt<std::string> Timeout( - "t", cl::desc("Run time limit for child process (default=5000)"), - cl::value_desc("msec"), cl::init("5000")); - -static cl::opt<bool> CrashMode( - "C", cl::desc("Keep crashing inputs, reject everything else")); -static cl::opt<bool> FridaMode( - "O", cl::desc("Use binary-only instrumentation (FRIDA mode)")); -static cl::opt<bool> QemuMode( - "Q", cl::desc("Use binary-only instrumentation (QEMU mode)")); -static cl::opt<bool> UnicornMode( - "U", cl::desc("Use unicorn-based instrumentation (unicorn mode)")); - -} // anonymous namespace - -// -------------------------------------------------------------------------- // -// Helper functions -// -------------------------------------------------------------------------- // - -static void GetWeights(const MemoryBuffer &MB, WeightsMap &Weights) { - - SmallVector<StringRef, 0> Lines; - MB.getBuffer().trim().split(Lines, '\n'); - - unsigned FailureCount = 0; - unsigned Weight = 0; - - for (const auto &Line : Lines) { - - const auto &[Seed, WeightStr] = Line.split(','); - - if (to_integer(WeightStr, Weight, 10)) { - - Weights.try_emplace(Seed, Weight); - - } else { - - if (FailureCount >= MAX_WEIGHT_FAILURES) { - ErrMsg() << "Too many failures. Aborting\n"; - std::exit(1); - } - - WarnMsg() << "Failed to read weight for '" << Seed << "'. Skipping...\n"; - FailureCount++; - - } - - } - -} - -static std::error_code readCov(const StringRef Trace, AFLCoverageVector &Cov) { - - const auto CovOrErr = MemoryBuffer::getFile(Trace); - if (const auto EC = CovOrErr.getError()) return EC; - - SmallVector<StringRef, 0> Lines; - CovOrErr.get()->getBuffer().trim().split(Lines, '\n'); - - AFLTupleID Edge = 0; - unsigned Freq = 0; - - for (const auto &Line : Lines) { - - const auto &[EdgeStr, FreqStr] = Line.split(':'); - - to_integer(EdgeStr, Edge, 10); - to_integer(FreqStr, Freq, 10); - Cov.push_back({Edge, Freq}); - - } - - return std::error_code(); - -} - -static Error runShowmap(AFLCoverageMap &CovMap, const StringRef Input, - bool BinCheck = false) { - - const bool InputIsFile = !sys::fs::is_directory(Input); - Optional<StringRef> Redirects[] = {None, None, None}; - - SmallString<32> TraceDir{OutputDir}; - sys::path::append(TraceDir, ".traces"); - - SmallString<32> Output{TraceDir}; - SmallString<32> StdinFile{TraceDir}; - - // ------------------------------------------------------------------------ // - // Prepare afl-showmap arguments - // - // If the given input is a file, then feed this directly into stdin. - // Otherwise, if it is a directory, specify this on the afl-showmap command - // line. - // ------------------------------------------------------------------------ // - - SmallVector<StringRef, 12> ShowmapArgs{ShowmapPath, "-q", - "-m", MemLimit, - "-t", Timeout}; - - if (InputIsFile) { - - StdinFile = Input; - sys::path::append(Output, - BinCheck ? ".run_test" : sys::path::filename(Input)); - - } else { - - sys::path::append(StdinFile, ".cur_input"); - ShowmapArgs.append({"-i", Input}); - - } - - - if (TargetArgsHasAtAt) { - - ShowmapArgs.append({"-H", StdinFile}); - Redirects[/* stdin */ 0] = "/dev/null"; - - } else if (InputIsFile) { - - Redirects[/* stdin */ 0] = Input; - - } - - if (FridaMode) ShowmapArgs.push_back("-O"); - if (QemuMode) ShowmapArgs.push_back("-Q"); - if (UnicornMode) ShowmapArgs.push_back("-U"); - - ShowmapArgs.append({"-o", Output, "--", TargetProg}); - ShowmapArgs.append(TargetArgs.begin(), TargetArgs.end()); - - // ------------------------------------------------------------------------ // - // Run afl-showmap - // ------------------------------------------------------------------------ // - - const int RC = sys::ExecuteAndWait(ShowmapPath, ShowmapArgs, - /*env=*/None, Redirects); - if (RC && !CrashMode) { - - ErrMsg() << "Exit code " << RC << " != 0 received from afl-showmap\n"; - return createStringError(inconvertibleErrorCode(), "afl-showmap failed"); - - } - - // ------------------------------------------------------------------------ // - // Parse afl-showmap output - // ------------------------------------------------------------------------ // - - AFLCoverageVector Cov; - std::error_code EC; - sys::fs::file_status Status; - - if (InputIsFile) { - - // Read a single output coverage file - if ((EC = readCov(Output, Cov))) { - - sys::fs::remove(Output); - return errorCodeToError(EC); - - } - - CovMap.try_emplace(sys::path::filename(Input), Cov); - if (!KeepTraces) sys::fs::remove(Output); - - } else { - - // Read a directory of output coverage files - for (sys::fs::recursive_directory_iterator Dir(TraceDir, EC), DirEnd; - Dir != DirEnd && !EC; Dir.increment(EC)) { - - if (EC) return errorCodeToError(EC); - - const auto &Path = Dir->path(); - if ((EC = sys::fs::status(Path, Status))) return errorCodeToError(EC); - - switch (Status.type()) { - - case sys::fs::file_type::regular_file: - case sys::fs::file_type::symlink_file: - case sys::fs::file_type::type_unknown: - Cov.clear(); - if ((EC = readCov(Path, Cov))) { - - sys::fs::remove(Path); - return errorCodeToError(EC); - - } - - CovMap.try_emplace(sys::path::filename(Path), Cov); - default: - // Ignore - break; - - } - - } - - if (!KeepTraces) sys::fs::remove_directories(TraceDir); - - } - - return Error::success(); - -} - -static inline void StartTimer() { - - StartTime = std::chrono::system_clock::now(); - -} - -static inline void EndTimer() { - - EndTime = std::chrono::system_clock::now(); - Duration = - std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime); - - SuccMsg() << " Completed in " << Duration.count() << " s\n"; - -} - -// -------------------------------------------------------------------------- // -// Main function -// -------------------------------------------------------------------------- // - -int main(int argc, char *argv[]) { - - WeightsMap Weights; - std::error_code EC; - - // ------------------------------------------------------------------------ // - // Parse command-line options and environment variables - // - // Also check the target arguments, as this determines how we run afl-showmap. - // ------------------------------------------------------------------------ // - - cl::ParseCommandLineOptions(argc, argv, "Optimal corpus minimizer"); - - KeepTraces = !!std::getenv("AFL_KEEP_TRACES"); - SkipBinCheck = !!std::getenv("AFL_SKIP_BIN_CHECK"); - const auto AFLPath = std::getenv("AFL_PATH"); - - if (CrashMode) ::setenv("AFL_CMIN_CRASHES_ONLY", "1", /*overwrite=*/true); - - for (const auto &Arg : TargetArgs) - if (Arg == "@@") TargetArgsHasAtAt = true; - - // ------------------------------------------------------------------------ // - // Find afl-showmap - // ------------------------------------------------------------------------ // - - SmallVector<StringRef, 16> EnvPaths; - - if (const char *PathEnv = std::getenv("PATH")) - SplitString(PathEnv, EnvPaths, ":"); - if (AFLPath) EnvPaths.push_back(AFLPath); - - const auto ShowmapOrErr = sys::findProgramByName("afl-showmap", EnvPaths); - if (ShowmapOrErr.getError()) { - - ErrMsg() << "Failed to find afl-showmap. Check your PATH\n"; - return 1; - - } - - ShowmapPath = *ShowmapOrErr; - - // ------------------------------------------------------------------------ // - // Parse weights - // - // Weights are stored in CSV file mapping a seed file name to an integer - // greater than zero. - // ------------------------------------------------------------------------ // - - if (WeightsFile != "") { - - StatMsg() << "Reading weights from '" << WeightsFile << "'...\n"; - StartTimer(); - - const auto WeightsOrErr = MemoryBuffer::getFile(WeightsFile); - if ((EC = WeightsOrErr.getError())) { - - ErrMsg() << "Failed to read weights from '" << WeightsFile - << "': " << EC.message() << '\n'; - return 1; - - } - - GetWeights(*WeightsOrErr.get(), Weights); - - EndTimer(); - - } - - // ------------------------------------------------------------------------ // - // Traverse input directory - // - // Find the seed files inside this directory (and subdirectories). - // ------------------------------------------------------------------------ // - - StatMsg() << "Locating seeds in '" << InputDir << "'...\n"; - StartTimer(); - - bool IsDirResult; - if ((EC = sys::fs::is_directory(InputDir, IsDirResult))) { - - ErrMsg() << "Invalid input directory '" << InputDir << "': " << EC.message() - << '\n'; - return 1; - - } - - sys::fs::file_status Status; - StringMap<std::string> SeedFiles; - - for (sys::fs::recursive_directory_iterator Dir(InputDir, EC), DirEnd; - Dir != DirEnd && !EC; Dir.increment(EC)) { - - if (EC) { - - ErrMsg() << "Failed to traverse input directory '" << InputDir - << "': " << EC.message() << '\n'; - return 1; - - } - - const auto &Path = Dir->path(); - if ((EC = sys::fs::status(Path, Status))) { - - ErrMsg() << "Failed to access '" << Path << "': " << EC.message() << '\n'; - return 1; - - } - - switch (Status.type()) { - - case sys::fs::file_type::regular_file: - case sys::fs::file_type::symlink_file: - case sys::fs::file_type::type_unknown: - SeedFiles.try_emplace(sys::path::filename(Path), - sys::path::parent_path(Path)); - default: - /* Ignore */ - break; - - } - - } - - EndTimer(); - - if (SeedFiles.empty()) { - - ErrMsg() << "Failed to find any seed files in '" << InputDir << "'\n"; - return 1; - - } - - // ------------------------------------------------------------------------ // - // Setup output directory - // ------------------------------------------------------------------------ // - - SmallString<32> TraceDir{OutputDir}; - sys::path::append(TraceDir, ".traces"); - - if ((EC = sys::fs::remove_directories(TraceDir))) { - - ErrMsg() << "Failed to remove existing trace directory in '" << OutputDir - << "': " << EC.message() << '\n'; - return 1; - - } - - if ((EC = sys::fs::create_directories(TraceDir))) { - - ErrMsg() << "Failed to create output directory '" << OutputDir - << "': " << EC.message() << '\n'; - return 1; - - } - - // ------------------------------------------------------------------------ // - // Test the target binary - // ------------------------------------------------------------------------ // - - AFLCoverageMap CovMap; - - if (!SkipBinCheck) { - - const auto It = SeedFiles.begin(); - SmallString<32> TestSeed{It->second}; - sys::path::append(TestSeed, It->first()); - - StatMsg() << "Testing the target binary with '" << TestSeed << "`...\n"; - StartTimer(); - - if (auto Err = runShowmap(CovMap, TestSeed, /*BinCheck=*/true)) { - - ErrMsg() << "No instrumentation output detected \n"; - return 1; - - } - - EndTimer(); - SuccMsg() << "OK, " << CovMap.begin()->second.size() - << " tuples recorded\n"; - - } - - // ------------------------------------------------------------------------ // - // Generate seed coverage - // - // Iterate over the corpus directory, which should contain seed files. Execute - // these seeds in the target program to generate coverage information, and - // then store this coverage information in the appropriate data structures. - // ------------------------------------------------------------------------ // - - StatMsg() << "Running afl-showmap on " << SeedFiles.size() << " seeds...\n"; - StartTimer(); - - MaxSATSeeds SeedVars; - MaxSATCoverageMap SeedCoverage; - EvalMaxSAT Solver(/*nbMinimizeThread=*/0); - - CovMap.clear(); - if (auto Err = runShowmap(CovMap, InputDir)) { - - ErrMsg() << "Failed to generate coverage: " << Err << '\n'; - return 1; - - } - - for (const auto &SeedCov : CovMap) { - - // Create a variable to represent the seed - const SeedID Var = Solver.newVar(); - SeedVars.emplace_back(Var, SeedCov.first()); - - // Record the set of seeds that cover a particular edge - for (auto &[Edge, Freq] : SeedCov.second) { - - if (EdgesOnly) { - - // Ignore edge frequency - SeedCoverage[Edge].insert(Var); - - } else { - - // Executing edge `E` `N` times means that it was executed `N - 1` times - for (unsigned I = 0; I < Freq; ++I) - SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(Var); - - } - - } - - } - - EndTimer(); - - // ------------------------------------------------------------------------ // - // Set the hard and soft constraints in the solver - // ------------------------------------------------------------------------ // - - StatMsg() << "Generating constraints...\n"; - StartTimer(); - - size_t SeedCount = 0; - - // Ensure that at least one seed is selected that covers a particular edge - // (hard constraint) - std::vector<SeedID> Clauses; - for (const auto &[_, Seeds] : SeedCoverage) { - - if (Seeds.empty()) continue; - - Clauses.clear(); - for (const auto &Seed : Seeds) - Clauses.push_back(Seed); - - Solver.addClause(Clauses); - - } - - // Select the minimum number of seeds that cover a particular set of edges - // (soft constraint) - for (const auto &[Var, Seed] : SeedVars) - Solver.addWeightedClause({-Var}, Weights[sys::path::filename(Seed)]); - - EndTimer(); - - // ------------------------------------------------------------------------ // - // Generate a solution - // ------------------------------------------------------------------------ // - - StatMsg() << "Solving...\n"; - StartTimer(); - - const bool Solved = Solver.solve(); - - EndTimer(); - - // ------------------------------------------------------------------------ // - // Save the solution - // - // This will copy the selected seeds to the given output directory. - // ------------------------------------------------------------------------ // - - SmallVector<StringRef, 64> Solution; - SmallString<32> InputSeed, OutputSeed; - - if (Solved) { - - for (const auto &[Var, Seed] : SeedVars) - if (Solver.getValue(Var) > 0) Solution.push_back(Seed); - - } else { - - ErrMsg() << "Failed to find an optimal solution for '" << InputDir << "'\n"; - return 1; - - } - - StatMsg() << "Copying " << Solution.size() << " seeds to '" << OutputDir - << "'...\n"; - StartTimer(); - - SeedCount = 0; - - for (const auto &Seed : Solution) { - - InputSeed = SeedFiles[Seed]; - sys::path::append(InputSeed, Seed); - - OutputSeed = OutputDir; - sys::path::append(OutputSeed, Seed); - - if ((EC = sys::fs::copy_file(InputSeed, OutputSeed))) { - - ErrMsg() << "Failed to copy '" << Seed << "' to '" << OutputDir - << "': " << EC.message() << '\n'; - return 1; - - } - - } - - EndTimer(); - SuccMsg() << "Done!\n"; - - return 0; - -} - |