commit a6fcc56c2d10c4cdef51f64927af8df3d309551b Author: goodtaeeun Date: 2023-06-10 00:02:11 +0900 [DAFL] init diff --git a/Makefile b/Makefile index 5e800db2627e..4abc4848dfdf 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ -DBIN_PATH=\"$(BIN_PATH)\" ifneq "$(filter Linux GNU%,$(shell uname))" "" - LDFLAGS += -ldl + LDFLAGS += -ldl -lm endif ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" "" diff --git a/afl-fuzz.c b/afl-fuzz.c index 61a3ee97bb78..560b31fe3aec 100644 --- a/afl-fuzz.c +++ b/afl-fuzz.c @@ -67,6 +67,8 @@ #include #include +#include + #if defined(__APPLE__) || defined(__FreeBSD__) || defined (__OpenBSD__) # include #endif /* __APPLE__ || __FreeBSD__ || __OpenBSD__ */ @@ -145,7 +147,8 @@ static s32 forksrv_pid, /* PID of the fork server */ child_pid = -1, /* PID of the fuzzed program */ out_dir_fd = -1; /* FD of the lock file */ -EXP_ST u8* trace_bits; /* SHM with instrumentation bitmap */ +EXP_ST u8* trace_bits; /* SHM with code coverage bitmap */ +EXP_ST u32* dfg_bits; /* SHM with DFG coverage bitmap */ EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing */ virgin_tmout[MAP_SIZE], /* Bits we haven't seen in tmouts */ @@ -153,7 +156,8 @@ EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing */ static u8 var_bytes[MAP_SIZE]; /* Bytes that appear to be variable */ -static s32 shm_id; /* ID of the SHM region */ +static s32 shm_id; /* ID of the SHM for code coverage */ +static s32 shm_id_dfg; /* ID of the SHM for DFG coverage */ static volatile u8 stop_soon, /* Ctrl-C pressed? */ clear_screen = 1, /* Window resized? */ @@ -243,6 +247,7 @@ struct queue_entry { u8 cal_failed, /* Calibration failed? */ trim_done, /* Trimmed? */ was_fuzzed, /* Had any fuzzing done yet? */ + handled_in_cycle, /* Was handled in current cycle? */ passed_det, /* Deterministic stages passed? */ has_new_cov, /* Triggers new coverage? */ var_behavior, /* Variable behavior? */ @@ -252,6 +257,9 @@ struct queue_entry { u32 bitmap_size, /* Number of bits set in bitmap */ exec_cksum; /* Checksum of the execution trace */ + u64 prox_score; /* Proximity score of the test case */ + u32 entry_id; /* The ID assigned to the test case */ + u64 exec_us, /* Execution time (us) */ handicap, /* Number of queue cycles behind */ depth; /* Path depth */ @@ -259,15 +267,18 @@ struct queue_entry { u8* trace_mini; /* Trace bytes, if kept */ u32 tc_ref; /* Trace bytes ref count */ - struct queue_entry *next, /* Next element, if any */ - *next_100; /* 100 elements ahead */ + struct queue_entry *next; /* Next element, if any */ }; static struct queue_entry *queue, /* Fuzzing queue (linked list) */ *queue_cur, /* Current offset within the queue */ - *queue_top, /* Top of the list */ - *q_prev100; /* Previous 100 marker */ + *queue_last;/* Lastly added to the queue */ +static struct queue_entry* + first_unhandled; /* 1st unhandled item in the queue */ + +static struct queue_entry* + shortcut_per_100[1024]; /* 100*N entries (replace next_100) */ static struct queue_entry* top_rated[MAP_SIZE]; /* Top entries for bitmap bytes */ @@ -284,6 +295,13 @@ static u32 extras_cnt; /* Total number of tokens read */ static struct extra_data* a_extras; /* Automatically selected extras */ static u32 a_extras_cnt; /* Total number of tokens available */ +static u64 max_prox_score = 0; /* Maximum score of the seed queue */ +static u64 min_prox_score = U64_MAX; /* Minimum score of the seed queue */ +static u64 total_prox_score = 0; /* Sum of proximity scores */ +static u64 avg_prox_score = 0; /* Average of proximity scores */ +static u32 no_dfg_schedule = 0; /* No DFG-based seed scheduling */ +static u32 t_x = 0; /* To test AFLGo's scheduling */ + static u8* (*post_handler)(u8* buf, u32* len); /* Interesting values, as per config.h */ @@ -783,9 +801,56 @@ static void mark_as_redundant(struct queue_entry* q, u8 state) { } +/* Insert a test case to the queue, preserving the sorted order based on the + * proximity score. Updates global variables 'queue', 'shortcut_per_100', and + * 'first_unhandled'. */ +static void sorted_insert_to_queue(struct queue_entry* q) { + + if (!queue) shortcut_per_100[0] = queue = q; + else { + struct queue_entry* q_probe; + u32 is_inserted = 0, i = 0; + first_unhandled = NULL; + + // Special case handling when we have to insert at the front. + if (queue->prox_score < q->prox_score) { + q->next = queue; + queue = q; + is_inserted = 1; + } + + // Traverse through the list to (1) update 'shortcut_per_100', (2) update + // 'first_unhandled', and (3) insert 'q' at the proper position. + q_probe = queue; + while (q_probe) { + + if ((i % 100 == 0) && (i / 100 < 1024)) { + shortcut_per_100[(i / 100)] = q_probe; + } + + if (!first_unhandled && !q_probe->handled_in_cycle) + first_unhandled = q_probe; + + if (!is_inserted) { + // If reached the end or found the proper position, insert there. + if (!q_probe->next || q_probe->next->prox_score < q->prox_score) { + q->next = q_probe->next; + q_probe->next = q; + is_inserted = 1; + } + } + + q_probe = q_probe->next; + i++; + + } + } + +} + /* Append new test case to the queue. */ -static void add_to_queue(u8* fname, u32 len, u8 passed_det) { +static void add_to_queue(u8* fname, u32 len, u8 passed_det, u64 prox_score) { struct queue_entry* q = ck_alloc(sizeof(struct queue_entry)); @@ -793,29 +858,43 @@ static void add_to_queue(u8* fname, u32 len, u8 passed_det) { q->len = len; q->depth = cur_depth + 1; q->passed_det = passed_det; + q->prox_score = prox_score; + q->entry_id = queued_paths; if (q->depth > max_depth) max_depth = q->depth; - if (queue_top) { - - queue_top->next = q; - queue_top = q; - - } else q_prev100 = queue = queue_top = q; + sorted_insert_to_queue(q); + queue_last = q; queued_paths++; pending_not_fuzzed++; cycles_wo_finds = 0; - if (!(queued_paths % 100)) { - - q_prev100->next_100 = q; - q_prev100 = q; + last_path_time = get_cur_time(); } - last_path_time = get_cur_time(); +/* Sort the queue based on the proximity score. Needed after the dry-run. */ + +static void sort_queue(void) { + + struct queue_entry *q_next, *q_cur; + u32 i; + + // First, backup 'queue'. Then, reset 'queue' and 'shortcut_per_100'. + q_cur = queue; + queue = NULL; + for (i = 0; i < 1024; i++) shortcut_per_100[i] = NULL; + + while (q_cur) { + + q_next = q_cur->next; + q_cur->next = NULL; // To satisfy sorted_insert_to_queue()'s assumption + sorted_insert_to_queue(q_cur); + q_cur = q_next; + + } } @@ -1046,6 +1125,18 @@ static u32 count_non_255_bytes(u8* mem) { } +static u64 compute_proximity_score(void) { + + u64 prox_score = 0; + u32 i = DFG_MAP_SIZE; + + while (i--) { + prox_score += dfg_bits[i]; + } + + return prox_score; + +} /* Destructively simplify trace by eliminating hit count information and replacing it with 0x80 or 0x01 depending on whether the tuple @@ -1213,6 +1304,7 @@ static inline void classify_counts(u32* mem) { static void remove_shm(void) { shmctl(shm_id, IPC_RMID, NULL); + shmctl(shm_id_dfg, IPC_RMID, NULL); } @@ -1354,6 +1446,7 @@ static void cull_queue(void) { EXP_ST void setup_shm(void) { u8* shm_str; + u8* shm_str_dfg; if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE); @@ -1361,12 +1454,15 @@ EXP_ST void setup_shm(void) { memset(virgin_crash, 255, MAP_SIZE); shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600); + shm_id_dfg = shmget(IPC_PRIVATE, sizeof(u32) * DFG_MAP_SIZE, + IPC_CREAT | IPC_EXCL | 0600); if (shm_id < 0) PFATAL("shmget() failed"); atexit(remove_shm); shm_str = alloc_printf("%d", shm_id); + shm_str_dfg = alloc_printf("%d", shm_id_dfg); /* If somebody is asking us to fuzz instrumented binaries in dumb mode, we don't want them to detect instrumentation, since we won't be sending @@ -1374,12 +1470,16 @@ EXP_ST void setup_shm(void) { later on, perhaps? */ if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1); + if (!dumb_mode) setenv(SHM_ENV_VAR_DFG, shm_str_dfg, 1); ck_free(shm_str); + ck_free(shm_str_dfg); trace_bits = shmat(shm_id, NULL, 0); + dfg_bits = shmat(shm_id_dfg, NULL, 0); if (trace_bits == (void *)-1) PFATAL("shmat() failed"); + if (dfg_bits == (void *)-1) PFATAL("shmat() failed"); } @@ -1491,7 +1591,9 @@ static void read_testcases(void) { if (!access(dfn, F_OK)) passed_det = 1; ck_free(dfn); - add_to_queue(fn, st.st_size, passed_det); + // Provide 0 as the proximity score and update later in calibrate_case(), + // and sort later after the dry-run phase. + add_to_queue(fn, st.st_size, passed_det, 0); } @@ -2286,6 +2388,7 @@ static u8 run_target(char** argv, u32 timeout) { territory. */ memset(trace_bits, 0, MAP_SIZE); + memset(dfg_bits, 0, sizeof(u32) * DFG_MAP_SIZE); MEM_BARRIER(); /* If we're running in "dumb" mode, we can't rely on the fork server @@ -2659,12 +2762,19 @@ static u8 calibrate_case(char** argv, struct queue_entry* q, u8* use_mem, q->exec_us = (stop_us - start_us) / stage_max; q->bitmap_size = count_bytes(trace_bits); + q->prox_score = compute_proximity_score(); q->handicap = handicap; q->cal_failed = 0; total_bitmap_size += q->bitmap_size; total_bitmap_entries++; + /* Update proximity score information */ + total_prox_score += q->prox_score; + avg_prox_score = total_prox_score / queued_paths; + if (min_prox_score > q->prox_score) min_prox_score = q->prox_score; + if (max_prox_score < q->prox_score) max_prox_score = q->prox_score; + update_bitmap_score(q); /* If this case didn't result in new output from the instrumentation, tell @@ -3149,6 +3259,7 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { u8 hnb; s32 fd; u8 keeping = 0, res; + u64 prox_score; if (fault == crash_mode) { @@ -3160,10 +3271,12 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { return 0; } + prox_score = compute_proximity_score(); + #ifndef SIMPLE_FILES - fn = alloc_printf("%s/queue/id:%06u,%s", out_dir, queued_paths, - describe_op(hnb)); + fn = alloc_printf("%s/queue/id:%06u,%llu,%s", out_dir, queued_paths, + prox_score, describe_op(hnb)); #else @@ -3171,19 +3284,19 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { #endif /* ^!SIMPLE_FILES */ - add_to_queue(fn, len, 0); + add_to_queue(fn, len, 0, prox_score); if (hnb == 2) { - queue_top->has_new_cov = 1; + queue_last->has_new_cov = 1; queued_with_cov++; } - queue_top->exec_cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); + queue_last->exec_cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); /* Try to calibrate inline; this also calls update_bitmap_score() when successful. */ - res = calibrate_case(argv, queue_top, mem, queue_cycle - 1, 0); + res = calibrate_case(argv, queue_last, mem, queue_cycle - 1, 0); if (res == FAULT_ERROR) FATAL("Unable to execute target application"); @@ -3288,10 +3401,14 @@ keep_as_crash: if (!unique_crashes) write_crash_readme(); + // Temporary computation for debugging. + prox_score = compute_proximity_score(); + #ifndef SIMPLE_FILES - fn = alloc_printf("%s/crashes/id:%06llu,sig:%02u,%s", out_dir, - unique_crashes, kill_signal, describe_op(0)); + fn = alloc_printf("%s/crashes/id:%06llu,%llu,sig:%02u,%s", out_dir, + unique_crashes, prox_score, kill_signal, + describe_op(0)); #else @@ -3328,40 +3445,6 @@ keep_as_crash: } -/* When resuming, try to find the queue position to start from. This makes sense - only when resuming, and when we can find the original fuzzer_stats. */ - -static u32 find_start_position(void) { - - static u8 tmp[4096]; /* Ought to be enough for anybody. */ - - u8 *fn, *off; - s32 fd, i; - u32 ret; - - if (!resuming_fuzz) return 0; - - if (in_place_resume) fn = alloc_printf("%s/fuzzer_stats", out_dir); - else fn = alloc_printf("%s/../fuzzer_stats", in_dir); - - fd = open(fn, O_RDONLY); - ck_free(fn); - - if (fd < 0) return 0; - - i = read(fd, tmp, sizeof(tmp) - 1); (void)i; /* Ignore errors */ - close(fd); - - off = strstr(tmp, "cur_path : "); - if (!off) return 0; - - ret = atoi(off + 20); - if (ret >= queued_paths) ret = 0; - return ret; - -} - - /* The same, but for timeouts. The idea is that when resuming sessions without -t given, we don't want to keep auto-scaling the timeout over and over again to prevent it from growing due to random flukes. */ @@ -4721,6 +4804,31 @@ static u32 choose_block_len(u32 limit) { } +/* Calculate the factor to multiply to the performance score. This is derived + * from the proximity scores of the DFG nodes covered by the test case. */ +static double calculate_factor(u64 prox_score) { + + double factor; + double normalized_prox_score, progress_to_tx, T, p; + u64 cur_ms, t; + + if (t_x) { // AFLGo's seed scheduling strategy. + if (min_prox_score == max_prox_score) normalized_prox_score = 0.5; + else normalized_prox_score = (double) (prox_score - min_prox_score) / + (double) (max_prox_score - min_prox_score); + cur_ms = get_cur_time(); + t = (cur_ms - start_time) / 1000; + progress_to_tx = ((double) t) / ((double) t_x * 60.0); + T = 1.0 / pow(20.0, progress_to_tx); + p = normalized_prox_score * (1.0 - T) + 0.5 * T; + factor = pow(2.0, 5.0 * 2.0 * (p - 0.5)); // Note log2(MAX_FACTOR) = 5.0 + } + else if (no_dfg_schedule || !avg_prox_score) factor = 1.0; // No factor. + else factor = ((double) prox_score) / ((double) avg_prox_score); // Default. + + return factor; + +} /* Calculate case desirability score to adjust the length of havoc fuzzing. A helper function for fuzz_one(). Maybe some of these constants should @@ -4990,6 +5098,8 @@ static u8 fuzz_one(char** argv) { u64 havoc_queued, orig_hit_cnt, new_hit_cnt; u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1; + struct queue_entry* target; // Target test case to splice with. + u8 ret_val = 1, doing_det = 0; u8 a_collect[MAX_AUTO_EXTRA]; @@ -6108,17 +6218,26 @@ havoc_stage: if (!splice_cycle) { + /* Adjust perf_score with the factor derived from the proximity score */ + u64 prox_score = queue_cur->prox_score; + perf_score = (u32) (calculate_factor(prox_score) * (double) perf_score); + stage_name = "havoc"; stage_short = "havoc"; stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * perf_score / havoc_div / 100; + } else { static u8 tmp[32]; perf_score = orig_perf; + /* Adjust perf_score with the factor derived from the proximity score */ + u64 prox_score = (queue_cur->prox_score + target->prox_score) / 2; + perf_score = (u32) (calculate_factor(prox_score) * (double) perf_score); + sprintf(tmp, "splice %u", splice_cycle); stage_name = tmp; stage_short = "splice"; @@ -6569,8 +6688,7 @@ retry_splicing: if (use_splicing && splice_cycle++ < SPLICE_CYCLES && queued_paths > 1 && queue_cur->len > 1) { - struct queue_entry* target; - u32 tid, split_at; + u32 idx, idx_div, split_at; u8* new_buf; s32 f_diff, l_diff; @@ -6583,25 +6701,31 @@ retry_splicing: len = queue_cur->len; } - /* Pick a random queue entry and seek to it. Don't splice with yourself. */ + /* Pick a random queue entry and find it. */ - do { tid = UR(queued_paths); } while (tid == current_entry); + idx = UR(queued_paths); - splicing_with = tid; - target = queue; + idx_div = idx / 100; + if (idx_div < 1024) { + target = shortcut_per_100[idx_div]; + idx -= idx_div * 100; + } else { + target = shortcut_per_100[1023]; + idx -= idx_div * 1024; + } - while (tid >= 100) { target = target->next_100; tid -= 100; } - while (tid--) target = target->next; + while (idx--) target = target->next; - /* Make sure that the target has a reasonable length. */ + /* Make sure that the target has a reasonable length and isn't yourself. */ while (target && (target->len < 2 || target == queue_cur)) { target = target->next; - splicing_with++; } if (!target) goto retry_splicing; + splicing_with = target->entry_id; + /* Read the testcase into a new buffer. */ fd = open(target->fname, O_RDONLY); @@ -7760,7 +7884,7 @@ int main(int argc, char** argv) { s32 opt; u64 prev_queued = 0; - u32 sync_interval_cnt = 0, seek_to; + u32 sync_interval_cnt = 0; u8 *extras_dir = 0; u8 mem_limit_given = 0; u8 exit_1 = !!getenv("AFL_BENCH_JUST_ONE"); @@ -7776,7 +7900,7 @@ int main(int argc, char** argv) { gettimeofday(&tv, &tz); srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); - while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Q")) > 0) + while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:QNc:")) > 0) switch (opt) { @@ -7944,6 +8068,28 @@ int main(int argc, char** argv) { break; + case 'N': /* Do not perform DFG-based seed scheduling */ + + no_dfg_schedule = 1; + break; + + case 'c': { /* Parameter to test AFLGo's scheduling strategy */ + + u8 suffix = 'm'; + if (sscanf(optarg, "%u%c", &t_x, &suffix) < 1 || + optarg[0] == '-') FATAL("Bad syntax used for -c"); + + switch (suffix) { + case 's': t_x /= 60; break; + case 'm': break; + case 'h': t_x *= 60; break; + case 'd': t_x *= 60 * 24; break; + default: FATAL("Unsupported suffix or bad syntax for -c"); + } + } + + break; + default: usage(argv[0]); @@ -8035,9 +8181,9 @@ int main(int argc, char** argv) { cull_queue(); - show_init_stats(); + sort_queue(); - seek_to = find_start_position(); + show_init_stats(); write_stats_file(0, 0, 0); save_auto(); @@ -8061,15 +8207,11 @@ int main(int argc, char** argv) { if (!queue_cur) { queue_cycle++; - current_entry = 0; cur_skipped_paths = 0; queue_cur = queue; - while (seek_to) { - current_entry++; - seek_to--; - queue_cur = queue_cur->next; - } + for (struct queue_entry* q_tmp = queue; q_tmp; q_tmp = q_tmp->next) + q_tmp->handled_in_cycle = 0; show_stats(); @@ -8094,6 +8236,10 @@ int main(int argc, char** argv) { } + /* Note that even if we skip the current item, it's considered "handled". */ + queue_cur->handled_in_cycle = 1; + current_entry = queue_cur->entry_id; + skipped_fuzz = fuzz_one(use_argv); if (!stop_soon && sync_id && !skipped_fuzz) { @@ -8107,9 +8253,13 @@ int main(int argc, char** argv) { if (stop_soon) break; + if (first_unhandled) { // This is set only when a new item was added. + queue_cur = first_unhandled; + first_unhandled = NULL; + } else { // Proceed to the next unhandled item in the queue. + while (queue_cur && queue_cur->handled_in_cycle) queue_cur = queue_cur->next; - current_entry++; - + } } if (queue_cur) show_stats(); diff --git a/config.h b/config.h index 46dd85733819..2f4af430e625 100644 --- a/config.h +++ b/config.h @@ -277,6 +277,7 @@ /* Environment variable used to pass SHM ID to the called program. */ #define SHM_ENV_VAR "__AFL_SHM_ID" +#define SHM_ENV_VAR_DFG "__AFL_SHM_ID_DFG" /* Other less interesting, internal-only variables. */ @@ -327,6 +328,7 @@ #define MAP_SIZE_POW2 16 #define MAP_SIZE (1 << MAP_SIZE_POW2) +#define DFG_MAP_SIZE 32568 /* Maximum allocator request size (keep well under INT_MAX): */ diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 7617f914de61..93fbd2aae722 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -61,7 +61,7 @@ else PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o endif -all: test_deps $(PROGS) test_build all_done +all: test_deps $(PROGS) test_deps: ifndef AFL_TRACE_PC diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 154a5db75754..af989c2037b2 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -34,6 +34,11 @@ #include "../config.h" #include "../debug.h" +#include +#include +#include +#include + #include #include #include @@ -45,8 +50,17 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +bool selective_coverage = false; +bool dfg_scoring = false; +bool no_filename_match = false; +std::set instr_targets; +std::map> dfg_node_map; + + namespace { class AFLCoverage : public ModulePass { @@ -67,38 +81,63 @@ namespace { } -char AFLCoverage::ID = 0; +void initCoverageTarget(char* select_file) { + std::string line; + std::ifstream stream(select_file); + while (std::getline(stream, line)) + instr_targets.insert(line); +} -bool AFLCoverage::runOnModule(Module &M) { - LLVMContext &C = M.getContext(); +void initDFGNodeMap(char* dfg_file) { + unsigned int idx = 0; + std::string line; + std::ifstream stream(dfg_file); + + while (std::getline(stream, line)) { + std::size_t space_idx = line.find(" "); + std::string score_str = line.substr(0, space_idx); + std::string targ_line = line.substr(space_idx + 1, std::string::npos); + int score = stoi(score_str); + dfg_node_map[targ_line] = std::make_pair(idx++, (unsigned int) score); + if (idx >= DFG_MAP_SIZE) { + std::cout << "Input DFG is too large (check DFG_MAP_SIZE)" << std::endl; + exit(1); + } + } +} - IntegerType *Int8Ty = IntegerType::getInt8Ty(C); - IntegerType *Int32Ty = IntegerType::getInt32Ty(C); - /* Show a banner */ +void initialize(void) { + char* select_file = getenv("DAFL_SELECTIVE_COV"); + char* dfg_file = getenv("DAFL_DFG_SCORE"); + + if (select_file) { + selective_coverage = true; + initCoverageTarget(select_file); + } - char be_quiet = 0; + if (dfg_file) { + dfg_scoring = true; + initDFGNodeMap(dfg_file); + } - if (isatty(2) && !getenv("AFL_QUIET")) { + if (getenv("DAFL_NO_FILENAME_MATCH")) no_filename_match = true; +} - SAYF(cCYA "afl-llvm-pass " cBRI VERSION cRST " by \n"); - } else be_quiet = 1; +char AFLCoverage::ID = 0; - /* Decide instrumentation ratio */ - char* inst_ratio_str = getenv("AFL_INST_RATIO"); - unsigned int inst_ratio = 100; +bool AFLCoverage::runOnModule(Module &M) { - if (inst_ratio_str) { + LLVMContext &C = M.getContext(); - if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || - inst_ratio > 100) - FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); - } + initialize(); /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ @@ -107,6 +146,10 @@ bool AFLCoverage::runOnModule(Module &M) { new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + GlobalVariable *AFLMapDFGPtr = + new GlobalVariable(M, PointerType::get(Int32Ty, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_area_dfg_ptr"); + GlobalVariable *AFLPrevLoc = new GlobalVariable( M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); @@ -114,15 +157,80 @@ bool AFLCoverage::runOnModule(Module &M) { /* Instrument all the things! */ int inst_blocks = 0; + int skip_blocks = 0; + int inst_dfg_nodes = 0; + std::string file_name = M.getSourceFileName(); + std::set covered_targets; + + std::size_t tokloc = file_name.find_last_of('/'); + if (tokloc != std::string::npos) { + file_name = file_name.substr(tokloc + 1, std::string::npos); + } + + for (auto &F : M) { + bool is_inst_targ = false; + const std::string func_name = F.getName().str(); + std::set::iterator it; + + /* Check if this function is our instrumentation target. */ + if (selective_coverage) { + for (it = instr_targets.begin(); it != instr_targets.end(); ++it) { + std::size_t colon = (*it).find(":"); + std::string target_file = (*it).substr(0, colon); + std::string target_func = (*it).substr(colon + 1, std::string::npos); + + if (no_filename_match || file_name.compare(target_file) == 0) { + if (func_name.compare(target_func) == 0) { + is_inst_targ = true; + covered_targets.insert(*it); + break; + } + } + } + } else is_inst_targ = true; // If disabled, instrument all the blocks. + + /* Now iterate through the basic blocks of the function. */ - for (auto &F : M) for (auto &BB : F) { + bool is_dfg_node = false; + unsigned int node_idx = 0; + unsigned int node_score = 0; + + if (is_inst_targ) { + inst_blocks++; + } + else { + skip_blocks++; + continue; + } + + /* Iterate through the instructions in the basic block to check if this + * block is a DFG node. If so, retrieve its proximity score. */ + + if (dfg_scoring) { + for (auto &inst : BB) { + DebugLoc dbg = inst.getDebugLoc(); + DILocation* DILoc = dbg.get(); + if (DILoc && DILoc->getLine()) { + int line_no = DILoc->getLine(); + std::ostringstream stream; + stream << file_name << ":" << line_no; + std::string targ_str = stream.str(); + if (dfg_node_map.count(targ_str) > 0) { + is_dfg_node = true; + auto node_info = dfg_node_map[targ_str]; + node_idx = node_info.first; + node_score = node_info.second; + inst_dfg_nodes++; + break; + } + } + } + } // If disabled, we don't have to do anything here. BasicBlock::iterator IP = BB.getFirstInsertionPt(); IRBuilder<> IRB(&(*IP)); - if (AFL_R(100) >= inst_ratio) continue; - /* Make up cur_loc */ unsigned int cur_loc = AFL_R(MAP_SIZE); @@ -156,21 +264,24 @@ bool AFLCoverage::runOnModule(Module &M) { IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - inst_blocks++; - + if (is_dfg_node) { + /* Update DFG coverage map. */ + LoadInst *DFGMap = IRB.CreateLoad(AFLMapDFGPtr); + DFGMap->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + ConstantInt * Idx = ConstantInt::get(Int32Ty, node_idx); + ConstantInt * Score = ConstantInt::get(Int32Ty, node_score); + Value *DFGMapPtrIdx = IRB.CreateGEP(DFGMap, Idx); + IRB.CreateStore(Score, DFGMapPtrIdx) + ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + } + } } /* Say something nice. */ - - if (!be_quiet) { - - if (!inst_blocks) WARNF("No instrumentation targets found."); - else OKF("Instrumented %u locations (%s mode, ratio %u%%).", - inst_blocks, getenv("AFL_HARDEN") ? "hardened" : - ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? - "ASAN/MSAN" : "non-hardened"), inst_ratio); - - } + for (auto it = covered_targets.begin(); it != covered_targets.end(); ++it) + std::cout << "Covered " << (*it) << std::endl; + OKF("Selected blocks: %u, skipped blocks: %u. instrumented DFG nodes: %u", + inst_blocks, skip_blocks, inst_dfg_nodes); return true; diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c index 60475c933073..6df88f7b462d 100644 --- a/llvm_mode/afl-llvm-rt.o.c +++ b/llvm_mode/afl-llvm-rt.o.c @@ -60,6 +60,9 @@ u8 __afl_area_initial[MAP_SIZE]; u8* __afl_area_ptr = __afl_area_initial; +u32 __afl_area_initial_dfg[DFG_MAP_SIZE]; +u32* __afl_area_dfg_ptr = __afl_area_initial_dfg; + __thread u32 __afl_prev_loc; @@ -73,6 +76,7 @@ static u8 is_persistent; static void __afl_map_shm(void) { u8 *id_str = getenv(SHM_ENV_VAR); + u8 *id_str_dfg = getenv(SHM_ENV_VAR_DFG); /* If we're running under AFL, attach to the appropriate region, replacing the early-stage __afl_area_initial region that is needed to allow some really @@ -81,12 +85,15 @@ static void __afl_map_shm(void) { if (id_str) { u32 shm_id = atoi(id_str); + u32 shm_id_dfg = atoi(id_str_dfg); __afl_area_ptr = shmat(shm_id, NULL, 0); + __afl_area_dfg_ptr = shmat(shm_id_dfg, NULL, 0); /* Whooooops. */ if (__afl_area_ptr == (void *)-1) _exit(1); + if (__afl_area_dfg_ptr == (void *)-1) _exit(1); /* Write something into the bitmap so that even with low AFL_INST_RATIO, our parent doesn't give up on us. */ @@ -196,6 +203,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { if (is_persistent) { memset(__afl_area_ptr, 0, MAP_SIZE); + memset(__afl_area_dfg_ptr, 0, sizeof(u32) * DFG_MAP_SIZE); __afl_area_ptr[0] = 1; __afl_prev_loc = 0; } @@ -224,6 +232,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { dummy output region. */ __afl_area_ptr = __afl_area_initial; + __afl_area_dfg_ptr = __afl_area_initial_dfg; } diff --git a/types.h b/types.h index f4a5716bfc05..e2a9058291a0 100644 --- a/types.h +++ b/types.h @@ -27,6 +27,7 @@ #include #include +#include typedef uint8_t u8; typedef uint16_t u16; @@ -50,8 +51,10 @@ typedef uint32_t u32; #ifdef __x86_64__ typedef unsigned long long u64; +#define U64_MAX ULLONG_MAX #else typedef uint64_t u64; +#define U64_MAX UINT64_MAX #endif /* ^__x86_64__ */ typedef int8_t s8;