From 06ecd6c51765f1e6212ee4cf063daca51704daa0 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Tue, 19 Nov 2024 15:38:22 +0900 Subject: Add DAFL packages --- loftix/fuzzing.scm | 18 + patches/dafl-extract-file-name.patch | 36 ++ patches/dafl.patch | 1012 ++++++++++++++++++++++++++++++++++ 3 files changed, 1066 insertions(+) create mode 100644 patches/dafl-extract-file-name.patch create mode 100644 patches/dafl.patch diff --git a/loftix/fuzzing.scm b/loftix/fuzzing.scm index e5f6178..1d796e4 100644 --- a/loftix/fuzzing.scm +++ b/loftix/fuzzing.scm @@ -47,6 +47,18 @@ (patches (search-patches "patches/afl++-keep-all-crashes.patch"))))))) +(define-public dafl++ + (package + (inherit afl++) + (name "dafl++") + (source + (origin + (inherit (package-source afl++)) + (patches (search-patches + "patches/afl++-keep-all-crashes.patch" + "patches/dafl.patch" + "patches/dafl-extract-file-name.patch")))))) + (define-public afl-dyninst (package (name "afl-dyninst") @@ -75,3 +87,9 @@ (description "Dyninst integration for AFL++") (home-page "https://trong.loang.net/~cnx/afl-dyninst") (license (list license:agpl3+ license:asl2.0)))) + +(define-public dafl-dyninst + (package + (inherit afl-dyninst) + (name "dafl-dyninst") + (inputs (list dafl++ dyninst)))) diff --git a/patches/dafl-extract-file-name.patch b/patches/dafl-extract-file-name.patch new file mode 100644 index 0000000..db6fdd6 --- /dev/null +++ b/patches/dafl-extract-file-name.patch @@ -0,0 +1,36 @@ +commit 50d68fc44948ab3b25a7af2f5fc9918110b1939a +Author: goodtaeeun +Date: 2024-02-13 15:24:01 +0900 + + [DAFL] extract file name from function + +diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc +index af989c2037b2..ecc017e9f3e0 100644 +--- a/llvm_mode/afl-llvm-pass.so.cc ++++ b/llvm_mode/afl-llvm-pass.so.cc +@@ -162,12 +162,21 @@ bool AFLCoverage::runOnModule(Module &M) { + std::string file_name = M.getSourceFileName(); + std::set covered_targets; + +- std::size_t tokloc = file_name.find_last_of('/'); +- if (tokloc != std::string::npos) { +- file_name = file_name.substr(tokloc + 1, std::string::npos); +- } ++ + + for (auto &F : M) { ++ ++ // Get file name from function in case the module is a combined bc file. ++ if (auto *SP = F.getSubprogram()) { ++ file_name = SP->getFilename().str(); ++ } ++ ++ // Keep only the file name. ++ std::size_t tokloc = file_name.find_last_of('/'); ++ if (tokloc != std::string::npos) { ++ file_name = file_name.substr(tokloc + 1, std::string::npos); ++ } ++ + bool is_inst_targ = false; + const std::string func_name = F.getName().str(); + std::set::iterator it; diff --git a/patches/dafl.patch b/patches/dafl.patch new file mode 100644 index 0000000..fa91885 --- /dev/null +++ b/patches/dafl.patch @@ -0,0 +1,1012 @@ +commit a6fcc56c2d10c4cdef51f64927af8df3d309551b +Author: goodtaeeun +Date: 2023-06-10 00:02:11 +0900 + + [DAFL] init + +diff --git a/Makefile b/Makefile +index 5e800db2627e..4abc4848dfdf 100644 +--- a/Makefile ++++ b/Makefile +@@ -33,7 +33,7 @@ CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ + -DBIN_PATH=\"$(BIN_PATH)\" + + ifneq "$(filter Linux GNU%,$(shell uname))" "" +- LDFLAGS += -ldl ++ LDFLAGS += -ldl -lm + endif + + ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" "" +diff --git a/afl-fuzz.c b/afl-fuzz.c +index 61a3ee97bb78..560b31fe3aec 100644 +--- a/afl-fuzz.c ++++ b/afl-fuzz.c +@@ -67,6 +67,8 @@ + #include + #include + ++#include ++ + #if defined(__APPLE__) || defined(__FreeBSD__) || defined (__OpenBSD__) + # include + #endif /* __APPLE__ || __FreeBSD__ || __OpenBSD__ */ +@@ -145,7 +147,8 @@ static s32 forksrv_pid, /* PID of the fork server */ + child_pid = -1, /* PID of the fuzzed program */ + out_dir_fd = -1; /* FD of the lock file */ + +-EXP_ST u8* trace_bits; /* SHM with instrumentation bitmap */ ++EXP_ST u8* trace_bits; /* SHM with code coverage bitmap */ ++EXP_ST u32* dfg_bits; /* SHM with DFG coverage bitmap */ + + EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing */ + virgin_tmout[MAP_SIZE], /* Bits we haven't seen in tmouts */ +@@ -153,7 +156,8 @@ EXP_ST u8 virgin_bits[MAP_SIZE], /* Regions yet untouched by fuzzing */ + + static u8 var_bytes[MAP_SIZE]; /* Bytes that appear to be variable */ + +-static s32 shm_id; /* ID of the SHM region */ ++static s32 shm_id; /* ID of the SHM for code coverage */ ++static s32 shm_id_dfg; /* ID of the SHM for DFG coverage */ + + static volatile u8 stop_soon, /* Ctrl-C pressed? */ + clear_screen = 1, /* Window resized? */ +@@ -243,6 +247,7 @@ struct queue_entry { + u8 cal_failed, /* Calibration failed? */ + trim_done, /* Trimmed? */ + was_fuzzed, /* Had any fuzzing done yet? */ ++ handled_in_cycle, /* Was handled in current cycle? */ + passed_det, /* Deterministic stages passed? */ + has_new_cov, /* Triggers new coverage? */ + var_behavior, /* Variable behavior? */ +@@ -252,6 +257,9 @@ struct queue_entry { + u32 bitmap_size, /* Number of bits set in bitmap */ + exec_cksum; /* Checksum of the execution trace */ + ++ u64 prox_score; /* Proximity score of the test case */ ++ u32 entry_id; /* The ID assigned to the test case */ ++ + u64 exec_us, /* Execution time (us) */ + handicap, /* Number of queue cycles behind */ + depth; /* Path depth */ +@@ -259,15 +267,18 @@ struct queue_entry { + u8* trace_mini; /* Trace bytes, if kept */ + u32 tc_ref; /* Trace bytes ref count */ + +- struct queue_entry *next, /* Next element, if any */ +- *next_100; /* 100 elements ahead */ ++ struct queue_entry *next; /* Next element, if any */ + + }; + + static struct queue_entry *queue, /* Fuzzing queue (linked list) */ + *queue_cur, /* Current offset within the queue */ +- *queue_top, /* Top of the list */ +- *q_prev100; /* Previous 100 marker */ ++ *queue_last;/* Lastly added to the queue */ ++static struct queue_entry* ++ first_unhandled; /* 1st unhandled item in the queue */ ++ ++static struct queue_entry* ++ shortcut_per_100[1024]; /* 100*N entries (replace next_100) */ + + static struct queue_entry* + top_rated[MAP_SIZE]; /* Top entries for bitmap bytes */ +@@ -284,6 +295,13 @@ static u32 extras_cnt; /* Total number of tokens read */ + static struct extra_data* a_extras; /* Automatically selected extras */ + static u32 a_extras_cnt; /* Total number of tokens available */ + ++static u64 max_prox_score = 0; /* Maximum score of the seed queue */ ++static u64 min_prox_score = U64_MAX; /* Minimum score of the seed queue */ ++static u64 total_prox_score = 0; /* Sum of proximity scores */ ++static u64 avg_prox_score = 0; /* Average of proximity scores */ ++static u32 no_dfg_schedule = 0; /* No DFG-based seed scheduling */ ++static u32 t_x = 0; /* To test AFLGo's scheduling */ ++ + static u8* (*post_handler)(u8* buf, u32* len); + + /* Interesting values, as per config.h */ +@@ -783,9 +801,56 @@ static void mark_as_redundant(struct queue_entry* q, u8 state) { + } + + ++/* Insert a test case to the queue, preserving the sorted order based on the ++ * proximity score. Updates global variables 'queue', 'shortcut_per_100', and ++ * 'first_unhandled'. */ ++static void sorted_insert_to_queue(struct queue_entry* q) { ++ ++ if (!queue) shortcut_per_100[0] = queue = q; ++ else { ++ struct queue_entry* q_probe; ++ u32 is_inserted = 0, i = 0; ++ first_unhandled = NULL; ++ ++ // Special case handling when we have to insert at the front. ++ if (queue->prox_score < q->prox_score) { ++ q->next = queue; ++ queue = q; ++ is_inserted = 1; ++ } ++ ++ // Traverse through the list to (1) update 'shortcut_per_100', (2) update ++ // 'first_unhandled', and (3) insert 'q' at the proper position. ++ q_probe = queue; ++ while (q_probe) { ++ ++ if ((i % 100 == 0) && (i / 100 < 1024)) { ++ shortcut_per_100[(i / 100)] = q_probe; ++ } ++ ++ if (!first_unhandled && !q_probe->handled_in_cycle) ++ first_unhandled = q_probe; ++ ++ if (!is_inserted) { ++ // If reached the end or found the proper position, insert there. ++ if (!q_probe->next || q_probe->next->prox_score < q->prox_score) { ++ q->next = q_probe->next; ++ q_probe->next = q; ++ is_inserted = 1; ++ } ++ } ++ ++ q_probe = q_probe->next; ++ i++; ++ ++ } ++ } ++ ++} ++ + /* Append new test case to the queue. */ + +-static void add_to_queue(u8* fname, u32 len, u8 passed_det) { ++static void add_to_queue(u8* fname, u32 len, u8 passed_det, u64 prox_score) { + + struct queue_entry* q = ck_alloc(sizeof(struct queue_entry)); + +@@ -793,29 +858,43 @@ static void add_to_queue(u8* fname, u32 len, u8 passed_det) { + q->len = len; + q->depth = cur_depth + 1; + q->passed_det = passed_det; ++ q->prox_score = prox_score; ++ q->entry_id = queued_paths; + + if (q->depth > max_depth) max_depth = q->depth; + +- if (queue_top) { +- +- queue_top->next = q; +- queue_top = q; +- +- } else q_prev100 = queue = queue_top = q; ++ sorted_insert_to_queue(q); + ++ queue_last = q; + queued_paths++; + pending_not_fuzzed++; + + cycles_wo_finds = 0; + +- if (!(queued_paths % 100)) { +- +- q_prev100->next_100 = q; +- q_prev100 = q; ++ last_path_time = get_cur_time(); + + } + +- last_path_time = get_cur_time(); ++/* Sort the queue based on the proximity score. Needed after the dry-run. */ ++ ++static void sort_queue(void) { ++ ++ struct queue_entry *q_next, *q_cur; ++ u32 i; ++ ++ // First, backup 'queue'. Then, reset 'queue' and 'shortcut_per_100'. ++ q_cur = queue; ++ queue = NULL; ++ for (i = 0; i < 1024; i++) shortcut_per_100[i] = NULL; ++ ++ while (q_cur) { ++ ++ q_next = q_cur->next; ++ q_cur->next = NULL; // To satisfy sorted_insert_to_queue()'s assumption ++ sorted_insert_to_queue(q_cur); ++ q_cur = q_next; ++ ++ } + + } + +@@ -1046,6 +1125,18 @@ static u32 count_non_255_bytes(u8* mem) { + + } + ++static u64 compute_proximity_score(void) { ++ ++ u64 prox_score = 0; ++ u32 i = DFG_MAP_SIZE; ++ ++ while (i--) { ++ prox_score += dfg_bits[i]; ++ } ++ ++ return prox_score; ++ ++} + + /* Destructively simplify trace by eliminating hit count information + and replacing it with 0x80 or 0x01 depending on whether the tuple +@@ -1213,6 +1304,7 @@ static inline void classify_counts(u32* mem) { + static void remove_shm(void) { + + shmctl(shm_id, IPC_RMID, NULL); ++ shmctl(shm_id_dfg, IPC_RMID, NULL); + + } + +@@ -1354,6 +1446,7 @@ static void cull_queue(void) { + EXP_ST void setup_shm(void) { + + u8* shm_str; ++ u8* shm_str_dfg; + + if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE); + +@@ -1361,12 +1454,15 @@ EXP_ST void setup_shm(void) { + memset(virgin_crash, 255, MAP_SIZE); + + shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600); ++ shm_id_dfg = shmget(IPC_PRIVATE, sizeof(u32) * DFG_MAP_SIZE, ++ IPC_CREAT | IPC_EXCL | 0600); + + if (shm_id < 0) PFATAL("shmget() failed"); + + atexit(remove_shm); + + shm_str = alloc_printf("%d", shm_id); ++ shm_str_dfg = alloc_printf("%d", shm_id_dfg); + + /* If somebody is asking us to fuzz instrumented binaries in dumb mode, + we don't want them to detect instrumentation, since we won't be sending +@@ -1374,12 +1470,16 @@ EXP_ST void setup_shm(void) { + later on, perhaps? */ + + if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1); ++ if (!dumb_mode) setenv(SHM_ENV_VAR_DFG, shm_str_dfg, 1); + + ck_free(shm_str); ++ ck_free(shm_str_dfg); + + trace_bits = shmat(shm_id, NULL, 0); ++ dfg_bits = shmat(shm_id_dfg, NULL, 0); + + if (trace_bits == (void *)-1) PFATAL("shmat() failed"); ++ if (dfg_bits == (void *)-1) PFATAL("shmat() failed"); + + } + +@@ -1491,7 +1591,9 @@ static void read_testcases(void) { + if (!access(dfn, F_OK)) passed_det = 1; + ck_free(dfn); + +- add_to_queue(fn, st.st_size, passed_det); ++ // Provide 0 as the proximity score and update later in calibrate_case(), ++ // and sort later after the dry-run phase. ++ add_to_queue(fn, st.st_size, passed_det, 0); + + } + +@@ -2286,6 +2388,7 @@ static u8 run_target(char** argv, u32 timeout) { + territory. */ + + memset(trace_bits, 0, MAP_SIZE); ++ memset(dfg_bits, 0, sizeof(u32) * DFG_MAP_SIZE); + MEM_BARRIER(); + + /* If we're running in "dumb" mode, we can't rely on the fork server +@@ -2659,12 +2762,19 @@ static u8 calibrate_case(char** argv, struct queue_entry* q, u8* use_mem, + + q->exec_us = (stop_us - start_us) / stage_max; + q->bitmap_size = count_bytes(trace_bits); ++ q->prox_score = compute_proximity_score(); + q->handicap = handicap; + q->cal_failed = 0; + + total_bitmap_size += q->bitmap_size; + total_bitmap_entries++; + ++ /* Update proximity score information */ ++ total_prox_score += q->prox_score; ++ avg_prox_score = total_prox_score / queued_paths; ++ if (min_prox_score > q->prox_score) min_prox_score = q->prox_score; ++ if (max_prox_score < q->prox_score) max_prox_score = q->prox_score; ++ + update_bitmap_score(q); + + /* If this case didn't result in new output from the instrumentation, tell +@@ -3149,6 +3259,7 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { + u8 hnb; + s32 fd; + u8 keeping = 0, res; ++ u64 prox_score; + + if (fault == crash_mode) { + +@@ -3160,10 +3271,12 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { + return 0; + } + ++ prox_score = compute_proximity_score(); ++ + #ifndef SIMPLE_FILES + +- fn = alloc_printf("%s/queue/id:%06u,%s", out_dir, queued_paths, +- describe_op(hnb)); ++ fn = alloc_printf("%s/queue/id:%06u,%llu,%s", out_dir, queued_paths, ++ prox_score, describe_op(hnb)); + + #else + +@@ -3171,19 +3284,19 @@ static u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { + + #endif /* ^!SIMPLE_FILES */ + +- add_to_queue(fn, len, 0); ++ add_to_queue(fn, len, 0, prox_score); + + if (hnb == 2) { +- queue_top->has_new_cov = 1; ++ queue_last->has_new_cov = 1; + queued_with_cov++; + } + +- queue_top->exec_cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); ++ queue_last->exec_cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); + + /* Try to calibrate inline; this also calls update_bitmap_score() when + successful. */ + +- res = calibrate_case(argv, queue_top, mem, queue_cycle - 1, 0); ++ res = calibrate_case(argv, queue_last, mem, queue_cycle - 1, 0); + + if (res == FAULT_ERROR) + FATAL("Unable to execute target application"); +@@ -3288,10 +3401,14 @@ keep_as_crash: + + if (!unique_crashes) write_crash_readme(); + ++ // Temporary computation for debugging. ++ prox_score = compute_proximity_score(); ++ + #ifndef SIMPLE_FILES + +- fn = alloc_printf("%s/crashes/id:%06llu,sig:%02u,%s", out_dir, +- unique_crashes, kill_signal, describe_op(0)); ++ fn = alloc_printf("%s/crashes/id:%06llu,%llu,sig:%02u,%s", out_dir, ++ unique_crashes, prox_score, kill_signal, ++ describe_op(0)); + + #else + +@@ -3328,40 +3445,6 @@ keep_as_crash: + } + + +-/* When resuming, try to find the queue position to start from. This makes sense +- only when resuming, and when we can find the original fuzzer_stats. */ +- +-static u32 find_start_position(void) { +- +- static u8 tmp[4096]; /* Ought to be enough for anybody. */ +- +- u8 *fn, *off; +- s32 fd, i; +- u32 ret; +- +- if (!resuming_fuzz) return 0; +- +- if (in_place_resume) fn = alloc_printf("%s/fuzzer_stats", out_dir); +- else fn = alloc_printf("%s/../fuzzer_stats", in_dir); +- +- fd = open(fn, O_RDONLY); +- ck_free(fn); +- +- if (fd < 0) return 0; +- +- i = read(fd, tmp, sizeof(tmp) - 1); (void)i; /* Ignore errors */ +- close(fd); +- +- off = strstr(tmp, "cur_path : "); +- if (!off) return 0; +- +- ret = atoi(off + 20); +- if (ret >= queued_paths) ret = 0; +- return ret; +- +-} +- +- + /* The same, but for timeouts. The idea is that when resuming sessions without + -t given, we don't want to keep auto-scaling the timeout over and over + again to prevent it from growing due to random flukes. */ +@@ -4721,6 +4804,31 @@ static u32 choose_block_len(u32 limit) { + + } + ++/* Calculate the factor to multiply to the performance score. This is derived ++ * from the proximity scores of the DFG nodes covered by the test case. */ ++static double calculate_factor(u64 prox_score) { ++ ++ double factor; ++ double normalized_prox_score, progress_to_tx, T, p; ++ u64 cur_ms, t; ++ ++ if (t_x) { // AFLGo's seed scheduling strategy. ++ if (min_prox_score == max_prox_score) normalized_prox_score = 0.5; ++ else normalized_prox_score = (double) (prox_score - min_prox_score) / ++ (double) (max_prox_score - min_prox_score); ++ cur_ms = get_cur_time(); ++ t = (cur_ms - start_time) / 1000; ++ progress_to_tx = ((double) t) / ((double) t_x * 60.0); ++ T = 1.0 / pow(20.0, progress_to_tx); ++ p = normalized_prox_score * (1.0 - T) + 0.5 * T; ++ factor = pow(2.0, 5.0 * 2.0 * (p - 0.5)); // Note log2(MAX_FACTOR) = 5.0 ++ } ++ else if (no_dfg_schedule || !avg_prox_score) factor = 1.0; // No factor. ++ else factor = ((double) prox_score) / ((double) avg_prox_score); // Default. ++ ++ return factor; ++ ++} + + /* Calculate case desirability score to adjust the length of havoc fuzzing. + A helper function for fuzz_one(). Maybe some of these constants should +@@ -4990,6 +5098,8 @@ static u8 fuzz_one(char** argv) { + u64 havoc_queued, orig_hit_cnt, new_hit_cnt; + u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1; + ++ struct queue_entry* target; // Target test case to splice with. ++ + u8 ret_val = 1, doing_det = 0; + + u8 a_collect[MAX_AUTO_EXTRA]; +@@ -6108,17 +6218,26 @@ havoc_stage: + + if (!splice_cycle) { + ++ /* Adjust perf_score with the factor derived from the proximity score */ ++ u64 prox_score = queue_cur->prox_score; ++ perf_score = (u32) (calculate_factor(prox_score) * (double) perf_score); ++ + stage_name = "havoc"; + stage_short = "havoc"; + stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / havoc_div / 100; + ++ + } else { + + static u8 tmp[32]; + + perf_score = orig_perf; + ++ /* Adjust perf_score with the factor derived from the proximity score */ ++ u64 prox_score = (queue_cur->prox_score + target->prox_score) / 2; ++ perf_score = (u32) (calculate_factor(prox_score) * (double) perf_score); ++ + sprintf(tmp, "splice %u", splice_cycle); + stage_name = tmp; + stage_short = "splice"; +@@ -6569,8 +6688,7 @@ retry_splicing: + if (use_splicing && splice_cycle++ < SPLICE_CYCLES && + queued_paths > 1 && queue_cur->len > 1) { + +- struct queue_entry* target; +- u32 tid, split_at; ++ u32 idx, idx_div, split_at; + u8* new_buf; + s32 f_diff, l_diff; + +@@ -6583,25 +6701,31 @@ retry_splicing: + len = queue_cur->len; + } + +- /* Pick a random queue entry and seek to it. Don't splice with yourself. */ ++ /* Pick a random queue entry and find it. */ + +- do { tid = UR(queued_paths); } while (tid == current_entry); ++ idx = UR(queued_paths); + +- splicing_with = tid; +- target = queue; ++ idx_div = idx / 100; ++ if (idx_div < 1024) { ++ target = shortcut_per_100[idx_div]; ++ idx -= idx_div * 100; ++ } else { ++ target = shortcut_per_100[1023]; ++ idx -= idx_div * 1024; ++ } + +- while (tid >= 100) { target = target->next_100; tid -= 100; } +- while (tid--) target = target->next; ++ while (idx--) target = target->next; + +- /* Make sure that the target has a reasonable length. */ ++ /* Make sure that the target has a reasonable length and isn't yourself. */ + + while (target && (target->len < 2 || target == queue_cur)) { + target = target->next; +- splicing_with++; + } + + if (!target) goto retry_splicing; + ++ splicing_with = target->entry_id; ++ + /* Read the testcase into a new buffer. */ + + fd = open(target->fname, O_RDONLY); +@@ -7760,7 +7884,7 @@ int main(int argc, char** argv) { + + s32 opt; + u64 prev_queued = 0; +- u32 sync_interval_cnt = 0, seek_to; ++ u32 sync_interval_cnt = 0; + u8 *extras_dir = 0; + u8 mem_limit_given = 0; + u8 exit_1 = !!getenv("AFL_BENCH_JUST_ONE"); +@@ -7776,7 +7900,7 @@ int main(int argc, char** argv) { + gettimeofday(&tv, &tz); + srandom(tv.tv_sec ^ tv.tv_usec ^ getpid()); + +- while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Q")) > 0) ++ while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:QNc:")) > 0) + + switch (opt) { + +@@ -7944,6 +8068,28 @@ int main(int argc, char** argv) { + + break; + ++ case 'N': /* Do not perform DFG-based seed scheduling */ ++ ++ no_dfg_schedule = 1; ++ break; ++ ++ case 'c': { /* Parameter to test AFLGo's scheduling strategy */ ++ ++ u8 suffix = 'm'; ++ if (sscanf(optarg, "%u%c", &t_x, &suffix) < 1 || ++ optarg[0] == '-') FATAL("Bad syntax used for -c"); ++ ++ switch (suffix) { ++ case 's': t_x /= 60; break; ++ case 'm': break; ++ case 'h': t_x *= 60; break; ++ case 'd': t_x *= 60 * 24; break; ++ default: FATAL("Unsupported suffix or bad syntax for -c"); ++ } ++ } ++ ++ break; ++ + default: + + usage(argv[0]); +@@ -8035,9 +8181,9 @@ int main(int argc, char** argv) { + + cull_queue(); + +- show_init_stats(); ++ sort_queue(); + +- seek_to = find_start_position(); ++ show_init_stats(); + + write_stats_file(0, 0, 0); + save_auto(); +@@ -8061,15 +8207,11 @@ int main(int argc, char** argv) { + if (!queue_cur) { + + queue_cycle++; +- current_entry = 0; + cur_skipped_paths = 0; + queue_cur = queue; + +- while (seek_to) { +- current_entry++; +- seek_to--; +- queue_cur = queue_cur->next; +- } ++ for (struct queue_entry* q_tmp = queue; q_tmp; q_tmp = q_tmp->next) ++ q_tmp->handled_in_cycle = 0; + + show_stats(); + +@@ -8094,6 +8236,10 @@ int main(int argc, char** argv) { + + } + ++ /* Note that even if we skip the current item, it's considered "handled". */ ++ queue_cur->handled_in_cycle = 1; ++ current_entry = queue_cur->entry_id; ++ + skipped_fuzz = fuzz_one(use_argv); + + if (!stop_soon && sync_id && !skipped_fuzz) { +@@ -8107,9 +8253,13 @@ int main(int argc, char** argv) { + + if (stop_soon) break; + ++ if (first_unhandled) { // This is set only when a new item was added. ++ queue_cur = first_unhandled; ++ first_unhandled = NULL; ++ } else { // Proceed to the next unhandled item in the queue. ++ while (queue_cur && queue_cur->handled_in_cycle) + queue_cur = queue_cur->next; +- current_entry++; +- ++ } + } + + if (queue_cur) show_stats(); +diff --git a/config.h b/config.h +index 46dd85733819..2f4af430e625 100644 +--- a/config.h ++++ b/config.h +@@ -277,6 +277,7 @@ + /* Environment variable used to pass SHM ID to the called program. */ + + #define SHM_ENV_VAR "__AFL_SHM_ID" ++#define SHM_ENV_VAR_DFG "__AFL_SHM_ID_DFG" + + /* Other less interesting, internal-only variables. */ + +@@ -327,6 +328,7 @@ + + #define MAP_SIZE_POW2 16 + #define MAP_SIZE (1 << MAP_SIZE_POW2) ++#define DFG_MAP_SIZE 32568 + + /* Maximum allocator request size (keep well under INT_MAX): */ + +diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile +index 7617f914de61..93fbd2aae722 100644 +--- a/llvm_mode/Makefile ++++ b/llvm_mode/Makefile +@@ -61,7 +61,7 @@ else + PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o + endif + +-all: test_deps $(PROGS) test_build all_done ++all: test_deps $(PROGS) + + test_deps: + ifndef AFL_TRACE_PC +diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc +index 154a5db75754..af989c2037b2 100644 +--- a/llvm_mode/afl-llvm-pass.so.cc ++++ b/llvm_mode/afl-llvm-pass.so.cc +@@ -34,6 +34,11 @@ + #include "../config.h" + #include "../debug.h" + ++#include ++#include ++#include ++#include ++ + #include + #include + #include +@@ -45,8 +50,17 @@ + #include "llvm/Support/Debug.h" + #include "llvm/Transforms/IPO/PassManagerBuilder.h" + ++#include "llvm/Support/CommandLine.h" ++ + using namespace llvm; + ++bool selective_coverage = false; ++bool dfg_scoring = false; ++bool no_filename_match = false; ++std::set instr_targets; ++std::map> dfg_node_map; ++ ++ + namespace { + + class AFLCoverage : public ModulePass { +@@ -67,38 +81,63 @@ namespace { + } + + +-char AFLCoverage::ID = 0; ++void initCoverageTarget(char* select_file) { ++ std::string line; ++ std::ifstream stream(select_file); + ++ while (std::getline(stream, line)) ++ instr_targets.insert(line); ++} + +-bool AFLCoverage::runOnModule(Module &M) { + +- LLVMContext &C = M.getContext(); ++void initDFGNodeMap(char* dfg_file) { ++ unsigned int idx = 0; ++ std::string line; ++ std::ifstream stream(dfg_file); ++ ++ while (std::getline(stream, line)) { ++ std::size_t space_idx = line.find(" "); ++ std::string score_str = line.substr(0, space_idx); ++ std::string targ_line = line.substr(space_idx + 1, std::string::npos); ++ int score = stoi(score_str); ++ dfg_node_map[targ_line] = std::make_pair(idx++, (unsigned int) score); ++ if (idx >= DFG_MAP_SIZE) { ++ std::cout << "Input DFG is too large (check DFG_MAP_SIZE)" << std::endl; ++ exit(1); ++ } ++ } ++} + +- IntegerType *Int8Ty = IntegerType::getInt8Ty(C); +- IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + +- /* Show a banner */ ++void initialize(void) { ++ char* select_file = getenv("DAFL_SELECTIVE_COV"); ++ char* dfg_file = getenv("DAFL_DFG_SCORE"); ++ ++ if (select_file) { ++ selective_coverage = true; ++ initCoverageTarget(select_file); ++ } + +- char be_quiet = 0; ++ if (dfg_file) { ++ dfg_scoring = true; ++ initDFGNodeMap(dfg_file); ++ } + +- if (isatty(2) && !getenv("AFL_QUIET")) { ++ if (getenv("DAFL_NO_FILENAME_MATCH")) no_filename_match = true; ++} + +- SAYF(cCYA "afl-llvm-pass " cBRI VERSION cRST " by \n"); + +- } else be_quiet = 1; ++char AFLCoverage::ID = 0; + +- /* Decide instrumentation ratio */ + +- char* inst_ratio_str = getenv("AFL_INST_RATIO"); +- unsigned int inst_ratio = 100; ++bool AFLCoverage::runOnModule(Module &M) { + +- if (inst_ratio_str) { ++ LLVMContext &C = M.getContext(); + +- if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || +- inst_ratio > 100) +- FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); ++ IntegerType *Int8Ty = IntegerType::getInt8Ty(C); ++ IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + +- } ++ initialize(); + + /* Get globals for the SHM region and the previous location. Note that + __afl_prev_loc is thread-local. */ +@@ -107,6 +146,10 @@ bool AFLCoverage::runOnModule(Module &M) { + new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + ++ GlobalVariable *AFLMapDFGPtr = ++ new GlobalVariable(M, PointerType::get(Int32Ty, 0), false, ++ GlobalValue::ExternalLinkage, 0, "__afl_area_dfg_ptr"); ++ + GlobalVariable *AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", + 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); +@@ -114,15 +157,80 @@ bool AFLCoverage::runOnModule(Module &M) { + /* Instrument all the things! */ + + int inst_blocks = 0; ++ int skip_blocks = 0; ++ int inst_dfg_nodes = 0; ++ std::string file_name = M.getSourceFileName(); ++ std::set covered_targets; ++ ++ std::size_t tokloc = file_name.find_last_of('/'); ++ if (tokloc != std::string::npos) { ++ file_name = file_name.substr(tokloc + 1, std::string::npos); ++ } ++ ++ for (auto &F : M) { ++ bool is_inst_targ = false; ++ const std::string func_name = F.getName().str(); ++ std::set::iterator it; ++ ++ /* Check if this function is our instrumentation target. */ ++ if (selective_coverage) { ++ for (it = instr_targets.begin(); it != instr_targets.end(); ++it) { ++ std::size_t colon = (*it).find(":"); ++ std::string target_file = (*it).substr(0, colon); ++ std::string target_func = (*it).substr(colon + 1, std::string::npos); ++ ++ if (no_filename_match || file_name.compare(target_file) == 0) { ++ if (func_name.compare(target_func) == 0) { ++ is_inst_targ = true; ++ covered_targets.insert(*it); ++ break; ++ } ++ } ++ } ++ } else is_inst_targ = true; // If disabled, instrument all the blocks. ++ ++ /* Now iterate through the basic blocks of the function. */ + +- for (auto &F : M) + for (auto &BB : F) { ++ bool is_dfg_node = false; ++ unsigned int node_idx = 0; ++ unsigned int node_score = 0; ++ ++ if (is_inst_targ) { ++ inst_blocks++; ++ } ++ else { ++ skip_blocks++; ++ continue; ++ } ++ ++ /* Iterate through the instructions in the basic block to check if this ++ * block is a DFG node. If so, retrieve its proximity score. */ ++ ++ if (dfg_scoring) { ++ for (auto &inst : BB) { ++ DebugLoc dbg = inst.getDebugLoc(); ++ DILocation* DILoc = dbg.get(); ++ if (DILoc && DILoc->getLine()) { ++ int line_no = DILoc->getLine(); ++ std::ostringstream stream; ++ stream << file_name << ":" << line_no; ++ std::string targ_str = stream.str(); ++ if (dfg_node_map.count(targ_str) > 0) { ++ is_dfg_node = true; ++ auto node_info = dfg_node_map[targ_str]; ++ node_idx = node_info.first; ++ node_score = node_info.second; ++ inst_dfg_nodes++; ++ break; ++ } ++ } ++ } ++ } // If disabled, we don't have to do anything here. + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + +- if (AFL_R(100) >= inst_ratio) continue; +- + /* Make up cur_loc */ + + unsigned int cur_loc = AFL_R(MAP_SIZE); +@@ -156,21 +264,24 @@ bool AFLCoverage::runOnModule(Module &M) { + IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + +- inst_blocks++; +- ++ if (is_dfg_node) { ++ /* Update DFG coverage map. */ ++ LoadInst *DFGMap = IRB.CreateLoad(AFLMapDFGPtr); ++ DFGMap->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); ++ ConstantInt * Idx = ConstantInt::get(Int32Ty, node_idx); ++ ConstantInt * Score = ConstantInt::get(Int32Ty, node_score); ++ Value *DFGMapPtrIdx = IRB.CreateGEP(DFGMap, Idx); ++ IRB.CreateStore(Score, DFGMapPtrIdx) ++ ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); ++ } ++ } + } + + /* Say something nice. */ +- +- if (!be_quiet) { +- +- if (!inst_blocks) WARNF("No instrumentation targets found."); +- else OKF("Instrumented %u locations (%s mode, ratio %u%%).", +- inst_blocks, getenv("AFL_HARDEN") ? "hardened" : +- ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? +- "ASAN/MSAN" : "non-hardened"), inst_ratio); +- +- } ++ for (auto it = covered_targets.begin(); it != covered_targets.end(); ++it) ++ std::cout << "Covered " << (*it) << std::endl; ++ OKF("Selected blocks: %u, skipped blocks: %u. instrumented DFG nodes: %u", ++ inst_blocks, skip_blocks, inst_dfg_nodes); + + return true; + +diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c +index 60475c933073..6df88f7b462d 100644 +--- a/llvm_mode/afl-llvm-rt.o.c ++++ b/llvm_mode/afl-llvm-rt.o.c +@@ -60,6 +60,9 @@ + u8 __afl_area_initial[MAP_SIZE]; + u8* __afl_area_ptr = __afl_area_initial; + ++u32 __afl_area_initial_dfg[DFG_MAP_SIZE]; ++u32* __afl_area_dfg_ptr = __afl_area_initial_dfg; ++ + __thread u32 __afl_prev_loc; + + +@@ -73,6 +76,7 @@ static u8 is_persistent; + static void __afl_map_shm(void) { + + u8 *id_str = getenv(SHM_ENV_VAR); ++ u8 *id_str_dfg = getenv(SHM_ENV_VAR_DFG); + + /* If we're running under AFL, attach to the appropriate region, replacing the + early-stage __afl_area_initial region that is needed to allow some really +@@ -81,12 +85,15 @@ static void __afl_map_shm(void) { + if (id_str) { + + u32 shm_id = atoi(id_str); ++ u32 shm_id_dfg = atoi(id_str_dfg); + + __afl_area_ptr = shmat(shm_id, NULL, 0); ++ __afl_area_dfg_ptr = shmat(shm_id_dfg, NULL, 0); + + /* Whooooops. */ + + if (__afl_area_ptr == (void *)-1) _exit(1); ++ if (__afl_area_dfg_ptr == (void *)-1) _exit(1); + + /* Write something into the bitmap so that even with low AFL_INST_RATIO, + our parent doesn't give up on us. */ +@@ -196,6 +203,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { + if (is_persistent) { + + memset(__afl_area_ptr, 0, MAP_SIZE); ++ memset(__afl_area_dfg_ptr, 0, sizeof(u32) * DFG_MAP_SIZE); + __afl_area_ptr[0] = 1; + __afl_prev_loc = 0; + } +@@ -224,6 +232,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { + dummy output region. */ + + __afl_area_ptr = __afl_area_initial; ++ __afl_area_dfg_ptr = __afl_area_initial_dfg; + + } + +diff --git a/types.h b/types.h +index f4a5716bfc05..e2a9058291a0 100644 +--- a/types.h ++++ b/types.h +@@ -27,6 +27,7 @@ + + #include + #include ++#include + + typedef uint8_t u8; + typedef uint16_t u16; +@@ -50,8 +51,10 @@ typedef uint32_t u32; + + #ifdef __x86_64__ + typedef unsigned long long u64; ++#define U64_MAX ULLONG_MAX + #else + typedef uint64_t u64; ++#define U64_MAX UINT64_MAX + #endif /* ^__x86_64__ */ + + typedef int8_t s8; -- cgit 1.4.1