diff options
author | van Hauser <vh@thc.org> | 2019-07-14 20:02:20 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-14 20:02:20 +0200 |
commit | da8e03e18a1d01cb4ea26fc8efb25c4e7708a0b5 (patch) | |
tree | 21e2f2e0bbe93341c3813d96576c5e83f62195d0 | |
parent | 5c0830f62857bc00d1da386e3d204932f544a6ba (diff) | |
parent | 4a80dbdd10aedd3a3e70a0631aeb4e01438b634c (diff) | |
download | afl++-da8e03e18a1d01cb4ea26fc8efb25c4e7708a0b5.tar.gz |
Merge branch 'master' into shared_memory_mmap_refactor
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | afl-as.h | 2 | ||||
-rw-r--r-- | afl-fuzz.c | 3915 | ||||
-rw-r--r-- | alloc-inl.h | 12 | ||||
-rw-r--r-- | config.h | 1 | ||||
-rw-r--r-- | docs/ChangeLog | 8 | ||||
-rw-r--r-- | docs/PATCHES | 6 | ||||
-rw-r--r-- | docs/README | 30 | ||||
-rw-r--r-- | docs/README.MOpt | 43 | ||||
-rw-r--r-- | docs/binaryonly_fuzzing.txt | 115 | ||||
-rw-r--r-- | docs/env_variables.txt | 23 | ||||
-rw-r--r-- | llvm_mode/LLVMInsTrim.so.cc | 350 | ||||
-rw-r--r-- | llvm_mode/Makefile | 17 | ||||
-rw-r--r-- | llvm_mode/MarkNodes.cc | 355 | ||||
-rw-r--r-- | llvm_mode/MarkNodes.h | 11 | ||||
-rw-r--r-- | llvm_mode/README.laf-intel | 8 | ||||
-rw-r--r-- | llvm_mode/README.llvm | 37 | ||||
-rw-r--r-- | llvm_mode/README.neverzero | 22 | ||||
-rw-r--r-- | llvm_mode/afl-clang-fast.c | 14 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-pass.so.cc | 64 | ||||
-rw-r--r-- | llvm_mode/compare-transform-pass.so.cc | 3 | ||||
-rw-r--r-- | llvm_mode/split-compares-pass.so.cc | 5 | ||||
-rw-r--r-- | llvm_mode/split-switches-pass.so.cc | 6 | ||||
-rw-r--r-- | python_mutators/README | 4 | ||||
-rw-r--r-- | python_mutators/XmlMutatorMin.py | 331 | ||||
-rw-r--r-- | python_mutators/wrapper_afl_min.py | 117 |
26 files changed, 5412 insertions, 93 deletions
diff --git a/Makefile b/Makefile index 2b860b66..60dfde18 100644 --- a/Makefile +++ b/Makefile @@ -181,8 +181,8 @@ all_done: test_build .NOTPARALLEL: clean clean: - rm -f $(PROGS) afl-as as afl-g++ afl-clang afl-clang++ *.o *~ a.out core core.[1-9][0-9]* *.stackdump test .test .test1 .test2 test-instr .test-instr0 .test-instr1 qemu_mode/qemu-2.10.0.tar.bz2 afl-qemu-trace - rm -rf out_dir qemu_mode/qemu-2.10.0 + rm -f $(PROGS) afl-as as afl-g++ afl-clang afl-clang++ *.o *~ a.out core core.[1-9][0-9]* *.stackdump test .test .test1 .test2 test-instr .test-instr0 .test-instr1 qemu_mode/qemu-3.1.0.tar.xz afl-qemu-trace + rm -rf out_dir qemu_mode/qemu-3.1.0 $(MAKE) -C llvm_mode clean $(MAKE) -C libdislocator clean $(MAKE) -C libtokencap clean @@ -194,7 +194,7 @@ install: all rm -f $${DESTDIR}$(BIN_PATH)/afl-as if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi ifndef AFL_TRACE_PC - if [ -f afl-clang-fast -a -f afl-llvm-pass.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-pass.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi + if [ -f afl-clang-fast -a -f libLLVMInsTrim.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 libLLVMInsTrim.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi else if [ -f afl-clang-fast -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi endif diff --git a/afl-as.h b/afl-as.h index 90944614..4748eda7 100644 --- a/afl-as.h +++ b/afl-as.h @@ -189,6 +189,7 @@ static const u8* main_payload_32 = " orb $1, (%edx, %edi, 1)\n" #else " incb (%edx, %edi, 1)\n" + " adcb $0, (%edx, %edi, 1)\n" // never zero counter implementation. slightly better path discovery and little performance impact #endif /* ^SKIP_COUNTS */ "\n" "__afl_return:\n" @@ -441,6 +442,7 @@ static const u8* main_payload_64 = " orb $1, (%rdx, %rcx, 1)\n" #else " incb (%rdx, %rcx, 1)\n" + " adcb $0, (%rdx, %rcx, 1)\n" // never zero counter implementation. slightly better path discovery and little performance impact #endif /* ^SKIP_COUNTS */ "\n" "__afl_return:\n" diff --git a/afl-fuzz.c b/afl-fuzz.c index 2e4b28e0..b6645c0f 100644 --- a/afl-fuzz.c +++ b/afl-fuzz.c @@ -78,9 +78,68 @@ # define EXP_ST static #endif /* ^AFL_LIB */ -/* Lots of globals, but mostly for the status UI and other things where it +/* MOpt: + Lots of globals, but mostly for the status UI and other things where it really makes no sense to haul them around as function parameters. */ - +EXP_ST u64 limit_time_puppet = 0; +u64 orig_hit_cnt_puppet = 0; +u64 last_limit_time_start = 0; +u64 tmp_pilot_time = 0; +u64 total_pacemaker_time = 0; +u64 total_puppet_find = 0; +u64 temp_puppet_find = 0; +u64 most_time_key = 0; +u64 most_time = 0; +u64 most_execs_key = 0; +u64 most_execs = 0; +u64 old_hit_count = 0; +int SPLICE_CYCLES_puppet; +int limit_time_sig = 0; +int key_puppet = 0; +int key_module = 0; +double w_init = 0.9; +double w_end = 0.3; +double w_now; +int g_now = 0; +int g_max = 5000; +#define operator_num 16 +#define swarm_num 5 +#define period_core 500000 +u64 tmp_core_time = 0; +int swarm_now = 0 ; +double x_now[swarm_num][operator_num], + L_best[swarm_num][operator_num], + eff_best[swarm_num][operator_num], + G_best[operator_num], + v_now[swarm_num][operator_num], + probability_now[swarm_num][operator_num], + swarm_fitness[swarm_num]; + + static u64 stage_finds_puppet[swarm_num][operator_num], /* Patterns found per fuzz stage */ + stage_finds_puppet_v2[swarm_num][operator_num], + stage_cycles_puppet_v2[swarm_num][operator_num], + stage_cycles_puppet_v3[swarm_num][operator_num], + stage_cycles_puppet[swarm_num][operator_num], + operator_finds_puppet[operator_num], + core_operator_finds_puppet[operator_num], + core_operator_finds_puppet_v2[operator_num], + core_operator_cycles_puppet[operator_num], + core_operator_cycles_puppet_v2[operator_num], + core_operator_cycles_puppet_v3[operator_num]; /* Execs per fuzz stage */ + +#define RAND_C (rand()%1000*0.001) +#define v_max 1 +#define v_min 0.05 +#define limit_time_bound 1.1 +#define SPLICE_CYCLES_puppet_up 25 +#define SPLICE_CYCLES_puppet_low 5 +#define STAGE_RANDOMBYTE 12 +#define STAGE_DELETEBYTE 13 +#define STAGE_Clone75 14 +#define STAGE_OverWrite75 15 +#define period_pilot 50000 +double period_pilot_tmp = 5000.0; +int key_lv = 0; EXP_ST u8 *in_dir, /* Input directory with test cases */ *out_file, /* File to fuzz, if any */ @@ -127,6 +186,7 @@ char *power_names[] = { }; static u8 schedule = EXPLORE; /* Power schedule (default: EXPLORE)*/ +static u8 havoc_max_mult = HAVOC_MAX_MULT; EXP_ST u8 skip_deterministic, /* Skip deterministic stages? */ force_deterministic, /* Force deterministic stages? */ @@ -259,6 +319,7 @@ struct queue_entry { u8 cal_failed, /* Calibration failed? */ trim_done, /* Trimmed? */ + was_fuzzed, /* historical, but needed for MOpt */ passed_det, /* Deterministic stages passed? */ has_new_cov, /* Triggers new coverage? */ var_behavior, /* Variable behavior? */ @@ -568,6 +629,35 @@ static void trim_py(char** ret, size_t* retlen) { #endif /* USE_PYTHON */ +int select_algorithm(void) { + + int i_puppet, j_puppet; + u32 seed[2]; + + if (!fixed_seed) { + ck_read(dev_urandom_fd, &seed, sizeof(seed), "/dev/urandom"); + srandom(seed[0]); + } + + double sele = ((double)(random()%10000)*0.0001); + //SAYF("select : %f\n",sele); + j_puppet = 0; + for (i_puppet = 0; i_puppet < operator_num; i_puppet++) { + if (unlikely(i_puppet == 0)) { + if (sele < probability_now[swarm_now][i_puppet]) + break; + } else { + if (sele < probability_now[swarm_now][i_puppet]) { + j_puppet =1; + break; + } + } + } + if (j_puppet ==1 && sele < probability_now[swarm_now][i_puppet-1]) + FATAL("error select_algorithm"); + return i_puppet; +} + /* Get unix time in milliseconds */ @@ -603,18 +693,14 @@ static u64 get_cur_time_us(void) { static inline u32 UR(u32 limit) { if (!fixed_seed && unlikely(!rand_cnt--)) { - u32 seed[2]; ck_read(dev_urandom_fd, &seed, sizeof(seed), "/dev/urandom"); - srandom(seed[0]); rand_cnt = (RESEED_RNG / 2) + (seed[1] % RESEED_RNG); - } return random() % limit; - } @@ -1583,7 +1669,7 @@ static void cull_queue(void) { top_rated[i]->favored = 1; queued_favored++; - if (top_rated[i]->fuzz_level == 0) pending_favored++; + if (top_rated[i]->fuzz_level == 0 || !top_rated[i]->was_fuzzed) pending_favored++; } @@ -2387,7 +2473,7 @@ EXP_ST void init_forkserver(char** argv) { #endif /* __APPLE__ */ " - Less likely, there is a horrible bug in the fuzzer. If other options\n" - " fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n"); + " fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n"); } else { @@ -2421,7 +2507,7 @@ EXP_ST void init_forkserver(char** argv) { #endif /* __APPLE__ */ " - Less likely, there is a horrible bug in the fuzzer. If other options\n" - " fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n", + " fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n", DMS(mem_limit << 20), mem_limit - 1); } @@ -2446,7 +2532,7 @@ EXP_ST void init_forkserver(char** argv) { SAYF("\n" cLRD "[-] " cRST "Hmm, looks like the target binary terminated before we could complete a\n" " handshake with the injected code. Perhaps there is a horrible bug in the\n" - " fuzzer. Poke <lcamtuf@coredump.cx> for troubleshooting tips.\n"); + " fuzzer. Poke <afl-users@googlegroups.com> for troubleshooting tips.\n"); } else { @@ -2469,7 +2555,7 @@ EXP_ST void init_forkserver(char** argv) { " estimate the required amount of virtual memory for the binary.\n\n" " - Less likely, there is a horrible bug in the fuzzer. If other options\n" - " fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n", + " fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n", getenv(DEFER_ENV_VAR) ? "three" : "two", getenv(DEFER_ENV_VAR) ? " - You are using deferred forkserver, but __AFL_INIT() is never\n" @@ -3051,7 +3137,7 @@ static void perform_dry_run(char** argv) { #endif /* __APPLE__ */ " - Least likely, there is a horrible bug in the fuzzer. If other options\n" - " fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n", + " fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n", DMS(mem_limit << 20), mem_limit - 1, doc_path); } else { @@ -3073,7 +3159,7 @@ static void perform_dry_run(char** argv) { #endif /* __APPLE__ */ " - Least likely, there is a horrible bug in the fuzzer. If other options\n" - " fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n"); + " fail, poke <afl-users@googlegroups.com> for troubleshooting tips.\n"); } @@ -3324,12 +3410,9 @@ static void write_crash_readme(void) { "them to a vendor? Check out the afl-tmin that comes with the fuzzer!\n\n" "Found any cool bugs in open-source tools using afl-fuzz? If yes, please drop\n" - "me a mail at <lcamtuf@coredump.cx> once the issues are fixed - I'd love to\n" - "add your finds to the gallery at:\n\n" - - " http://lcamtuf.coredump.cx/afl/\n\n" + "an mail at <afl-users@googlegroups.com> once the issues are fixed\n\n" - "Thanks :-)\n", + " https://github.com/vanhauser-thc/AFLplusplus\n\n", orig_cmdline, DMS(mem_limit << 20)); /* ignore errors */ @@ -5149,10 +5232,12 @@ static u32 calculate_score(struct queue_entry* q) { perf_score *= factor / POWER_BETA; + // MOpt mode + if (limit_time_sig != 0 && max_depth - q->depth < 3) perf_score *= 2; /* Make sure that we don't go over limit. */ - if (perf_score > HAVOC_MAX_MULT * 100) perf_score = HAVOC_MAX_MULT * 100; + if (perf_score > havoc_max_mult * 100) perf_score = havoc_max_mult * 100; return perf_score; @@ -5349,7 +5434,7 @@ static u8 could_be_interest(u32 old_val, u32 new_val, u8 blen, u8 check_le) { function is a tad too long... returns 0 if fuzzed successfully, 1 if skipped or bailed out. */ -static u8 fuzz_one(char** argv) { +static u8 fuzz_one_original(char** argv) { s32 len, fd, temp_len, i, j; u8 *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0; @@ -5376,7 +5461,7 @@ static u8 fuzz_one(char** argv) { possibly skip to them at the expense of already-fuzzed or non-favored cases. */ - if ((queue_cur->fuzz_level > 0 || !queue_cur->favored) && + if (((queue_cur->was_fuzzed > 0 || queue_cur->fuzz_level > 0) || !queue_cur->favored) && UR(100) < SKIP_TO_NEW_PROB) return 1; } else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) { @@ -5385,7 +5470,7 @@ static u8 fuzz_one(char** argv) { The odds of skipping stuff are higher for already-fuzzed inputs and lower for never-fuzzed entries. */ - if (queue_cycle > 1 && queue_cur->fuzz_level == 0) { + if (queue_cycle > 1 && (queue_cur->fuzz_level == 0 || queue_cur->was_fuzzed)) { if (UR(100) < SKIP_NFAV_NEW_PROB) return 1; @@ -5495,9 +5580,9 @@ static u8 fuzz_one(char** argv) { if (skip_deterministic || ((!queue_cur->passed_det) && perf_score < ( - queue_cur->depth * 30 <= HAVOC_MAX_MULT * 100 + queue_cur->depth * 30 <= havoc_max_mult * 100 ? queue_cur->depth * 30 - : HAVOC_MAX_MULT * 100)) + : havoc_max_mult * 100)) || queue_cur->passed_det) #ifdef USE_PYTHON goto python_stage; @@ -6543,7 +6628,7 @@ retry_external_pick: permitting. */ if (queued_paths != havoc_queued) { - if (perf_score <= HAVOC_MAX_MULT * 100) { + if (perf_score <= havoc_max_mult * 100) { stage_max *= 2; perf_score *= 2; } @@ -6954,7 +7039,7 @@ havoc_stage: /* Inserted part */ memcpy(new_buf + insert_at, a_extras[use_extra].data, extra_len); - } else { + } else { use_extra = UR(extras_cnt); extra_len = extras[use_extra].len; @@ -7002,7 +7087,7 @@ havoc_stage: if (queued_paths != havoc_queued) { - if (perf_score <= HAVOC_MAX_MULT * 100) { + if (perf_score <= havoc_max_mult * 100) { stage_max *= 2; perf_score *= 2; } @@ -7128,8 +7213,9 @@ abandon_entry: /* Update pending_not_fuzzed count if we made it through the calibration cycle and have not seen this entry before. */ - if (!stop_soon && !queue_cur->cal_failed && queue_cur->fuzz_level == 0) { + if (!stop_soon && !queue_cur->cal_failed && (queue_cur->was_fuzzed == 0 || queue_cur->fuzz_level == 0)) { pending_not_fuzzed--; + queue_cur->was_fuzzed = 1; if (queue_cur->favored) pending_favored--; } @@ -7147,6 +7233,3627 @@ abandon_entry: } +/* MOpt mode */ +static u8 pilot_fuzzing(char** argv) { + + s32 len, fd, temp_len, i, j; + u8 *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0; + u64 havoc_queued, orig_hit_cnt, new_hit_cnt, cur_ms_lv; + u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1; + + u8 ret_val = 1, doing_det = 0; + + u8 a_collect[MAX_AUTO_EXTRA]; + u32 a_len = 0; + +#ifdef IGNORE_FINDS + + /* In IGNORE_FINDS mode, skip any entries that weren't in the + initial data set. */ + + if (queue_cur->depth > 1) return 1; + +#else + + if (pending_favored) { + + /* If we have any favored, non-fuzzed new arrivals in the queue, + possibly skip to them at the expense of already-fuzzed or non-favored + cases. */ + + if ((queue_cur->was_fuzzed || !queue_cur->favored) && + UR(100) < SKIP_TO_NEW_PROB) return 1; + + } + else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) { + + /* Otherwise, still possibly skip non-favored cases, albeit less often. + The odds of skipping stuff are higher for already-fuzzed inputs and + lower for never-fuzzed entries. */ + + if (queue_cycle > 1 && !queue_cur->was_fuzzed) { + + if (UR(100) < SKIP_NFAV_NEW_PROB) return 1; + + } + else { + + if (UR(100) < SKIP_NFAV_OLD_PROB) return 1; + + } + + } + +#endif /* ^IGNORE_FINDS */ + + if (not_on_tty) { + ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...", + current_entry, queued_paths, unique_crashes); + fflush(stdout); + } + + /* Map the test case into memory. */ + + fd = open(queue_cur->fname, O_RDONLY); + + if (fd < 0) PFATAL("Unable to open '%s'", queue_cur->fname); + + len = queue_cur->len; + + orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (orig_in == MAP_FAILED) PFATAL("Unable to mmap '%s'", queue_cur->fname); + + close(fd); + + /* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every + single byte anyway, so it wouldn't give us any performance or memory usage + benefits. */ + + out_buf = ck_alloc_nozero(len); + + subseq_tmouts = 0; + + cur_depth = queue_cur->depth; + + /******************************************* + * CALIBRATION (only if failed earlier on) * + *******************************************/ + + if (queue_cur->cal_failed) { + + u8 res = FAULT_TMOUT; + + if (queue_cur->cal_failed < CAL_CHANCES) { + + res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0); + + if (res == FAULT_ERROR) + FATAL("Unable to execute target application"); + + } + + if (stop_soon || res != crash_mode) { + cur_skipped_paths++; + goto abandon_entry; + } + + } + + /************ + * TRIMMING * + ************/ + + if (!dumb_mode && !queue_cur->trim_done) { + + u8 res = trim_case(argv, queue_cur, in_buf); + + if (res == FAULT_ERROR) + FATAL("Unable to execute target application"); + + if (stop_soon) { + cur_skipped_paths++; + goto abandon_entry; + } + + /* Don't retry trimming, even if it failed. */ + + queue_cur->trim_done = 1; + + if (len != queue_cur->len) len = queue_cur->len; + + } + + memcpy(out_buf, in_buf, len); + + /********************* + * PERFORMANCE SCORE * + *********************/ + + orig_perf = perf_score = calculate_score(queue_cur); + + /* Skip right away if -d is given, if we have done deterministic fuzzing on + this entry ourselves (was_fuzzed), or if it has gone through deterministic + testing in earlier, resumed runs (passed_det). */ + + if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det) + goto havoc_stage; + + /* Skip deterministic fuzzing if exec path checksum puts this out of scope + for this master instance. */ + + if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1) + goto havoc_stage; + + + cur_ms_lv = get_cur_time(); + if (!(key_puppet == 0 && ((cur_ms_lv - last_path_time < limit_time_puppet) || + (last_crash_time != 0 && cur_ms_lv - last_crash_time < limit_time_puppet) || last_path_time == 0))) + { + key_puppet = 1; + goto pacemaker_fuzzing; + } + + doing_det = 1; + + /********************************************* + * SIMPLE BITFLIP (+dictionary construction) * + *********************************************/ + +#define FLIP_BIT(_ar, _b) do { \ + u8* _arf = (u8*)(_ar); \ + u32 _bf = (_b); \ + _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \ + } while (0) + + /* Single walking bit. */ + + stage_short = "flip1"; + stage_max = len << 3; + stage_name = "bitflip 1/1"; + + + + + stage_val_type = STAGE_VAL_NONE; + + orig_hit_cnt = queued_paths + unique_crashes; + + prev_cksum = queue_cur->exec_cksum; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur >> 3; + + FLIP_BIT(out_buf, stage_cur); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + FLIP_BIT(out_buf, stage_cur); + + /* While flipping the least significant bit in every byte, pull of an extra + trick to detect possible syntax tokens. In essence, the idea is that if + you have a binary blob like this: + + xxxxxxxxIHDRxxxxxxxx + + ...and changing the leading and trailing bytes causes variable or no + changes in program flow, but touching any character in the "IHDR" string + always produces the same, distinctive path, it's highly likely that + "IHDR" is an atomically-checked magic value of special significance to + the fuzzed format. + + We do this here, rather than as a separate stage, because it's a nice + way to keep the operation approximately "free" (i.e., no extra execs). + + Empirically, performing the check when flipping the least significant bit + is advantageous, compared to doing it at the time of more disruptive + changes, where the program flow may be affected in more violent ways. + + The caveat is that we won't generate dictionaries in the -d mode or -S + mode - but that's probably a fair trade-off. + + This won't work particularly well with paths that exhibit variable + behavior, but fails gracefully, so we'll carry out the checks anyway. + + */ + + if (!dumb_mode && (stage_cur & 7) == 7) { + + u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); + + if (stage_cur == stage_max - 1 && cksum == prev_cksum) { + + /* If at end of file and we are still collecting a string, grab the + final character and force output. */ + + if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3]; + a_len++; + + if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA) + maybe_add_auto(a_collect, a_len); + + } + else if (cksum != prev_cksum) { + + /* Otherwise, if the checksum has changed, see if we have something + worthwhile queued up, and collect that if the answer is yes. */ + + if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA) + maybe_add_auto(a_collect, a_len); + + a_len = 0; + prev_cksum = cksum; + + } + + /* Continue collecting string, but only if the bit flip actually made + any difference - we don't want no-op tokens. */ + + if (cksum != queue_cur->exec_cksum) { + + if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3]; + a_len++; + + } + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP1] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP1] += stage_max; + + /* Two walking bits. */ + + stage_name = "bitflip 2/1"; + stage_short = "flip2"; + stage_max = (len << 3) - 1; + +#if !defined(__arm__) && !defined(__arm64__) + if (f_data[0] != 0xCF || f_data[1] != 0xFA || f_data[2] != 0xED) + FATAL("Program '%s' is not a 64-bit Mach-O binary", target_path); +#endif + + orig_hit_cnt = new_hit_cnt; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur >> 3; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP2] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP2] += stage_max; + + + + /* Four walking bits. */ + + stage_name = "bitflip 4/1"; + stage_short = "flip4"; + stage_max = (len << 3) - 3; + + + + + + orig_hit_cnt = new_hit_cnt; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur >> 3; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + FLIP_BIT(out_buf, stage_cur + 2); + FLIP_BIT(out_buf, stage_cur + 3); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + FLIP_BIT(out_buf, stage_cur + 2); + FLIP_BIT(out_buf, stage_cur + 3); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP4] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP4] += stage_max; + + + + + /* Effector map setup. These macros calculate: + + EFF_APOS - position of a particular file offset in the map. + EFF_ALEN - length of a map with a particular number of bytes. + EFF_SPAN_ALEN - map span for a sequence of bytes. + + */ + +#define EFF_APOS(_p) ((_p) >> EFF_MAP_SCALE2) +#define EFF_REM(_x) ((_x) & ((1 << EFF_MAP_SCALE2) - 1)) +#define EFF_ALEN(_l) (EFF_APOS(_l) + !!EFF_REM(_l)) +#define EFF_SPAN_ALEN(_p, _l) (EFF_APOS((_p) + (_l) - 1) - EFF_APOS(_p) + 1) + + /* Initialize effector map for the next step (see comments below). Always + flag first and last byte as doing something. */ + + eff_map = ck_alloc(EFF_ALEN(len)); + eff_map[0] = 1; + + if (EFF_APOS(len - 1) != 0) { + eff_map[EFF_APOS(len - 1)] = 1; + eff_cnt++; + } + + /* Walking byte. */ + + stage_name = "bitflip 8/8"; + stage_short = "flip8"; + stage_max = len; + + + + orig_hit_cnt = new_hit_cnt; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur; + + out_buf[stage_cur] ^= 0xFF; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + /* We also use this stage to pull off a simple trick: we identify + bytes that seem to have no effect on the current execution path + even when fully flipped - and we skip them during more expensive + deterministic stages, such as arithmetics or known ints. */ + + if (!eff_map[EFF_APOS(stage_cur)]) { + + u32 cksum; + + /* If in dumb mode or if the file is very short, just flag everything + without wasting time on checksums. */ + + if (!dumb_mode && len >= EFF_MIN_LEN) + cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); + else + cksum = ~queue_cur->exec_cksum; + + if (cksum != queue_cur->exec_cksum) { + eff_map[EFF_APOS(stage_cur)] = 1; + eff_cnt++; + } + + } + + out_buf[stage_cur] ^= 0xFF; + + } + + /* If the effector map is more than EFF_MAX_PERC dense, just flag the + whole thing as worth fuzzing, since we wouldn't be saving much time + anyway. */ + + if (eff_cnt != EFF_ALEN(len) && + eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) { + + memset(eff_map, 1, EFF_ALEN(len)); + + blocks_eff_select += EFF_ALEN(len); + + } + else { + + blocks_eff_select += eff_cnt; + + } + + blocks_eff_total += EFF_ALEN(len); + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP8] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP8] += stage_max; + + + + + + /* Two walking bytes. */ + + if (len < 2) goto skip_bitflip; + + stage_name = "bitflip 16/8"; + stage_short = "flip16"; + stage_cur = 0; + stage_max = len - 1; + + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 1; i++) { + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) { + stage_max--; + continue; + } + + stage_cur_byte = i; + + *(u16*)(out_buf + i) ^= 0xFFFF; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + *(u16*)(out_buf + i) ^= 0xFFFF; + + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP16] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP16] += stage_max; + + + + + if (len < 4) goto skip_bitflip; + + /* Four walking bytes. */ + + stage_name = "bitflip 32/8"; + stage_short = "flip32"; + stage_cur = 0; + stage_max = len - 3; + + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 3; i++) { + + /* Let's consult the effector map... */ + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] && + !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) { + stage_max--; + continue; + } + + stage_cur_byte = i; + + *(u32*)(out_buf + i) ^= 0xFFFFFFFF; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + *(u32*)(out_buf + i) ^= 0xFFFFFFFF; + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP32] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP32] += stage_max; + + + + + + + skip_bitflip: + + if (no_arith) goto skip_arith; + + /********************** + * ARITHMETIC INC/DEC * + **********************/ + + /* 8-bit arithmetics. */ + + stage_name = "arith 8/8"; + stage_short = "arith8"; + stage_cur = 0; + stage_max = 2 * len * ARITH_MAX; + + + + + stage_val_type = STAGE_VAL_LE; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len; i++) { + + u8 orig = out_buf[i]; + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)]) { + stage_max -= 2 * ARITH_MAX; + continue; + } + + stage_cur_byte = i; + + for (j = 1; j <= ARITH_MAX; j++) { + + u8 r = orig ^ (orig + j); + + /* Do arithmetic operations only if the result couldn't be a product + of a bitflip. */ + + if (!could_be_bitflip(r)) { + + stage_cur_val = j; + out_buf[i] = orig + j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + r = orig ^ (orig - j); + + if (!could_be_bitflip(r)) { + + stage_cur_val = -j; + out_buf[i] = orig - j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + out_buf[i] = orig; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_ARITH8] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_ARITH8] += stage_max; + + + + + + /* 16-bit arithmetics, both endians. */ + + if (len < 2) goto skip_arith; + + stage_name = "arith 16/8"; + stage_short = "arith16"; + stage_cur = 0; + stage_max = 4 * (len - 1) * ARITH_MAX; + + + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 1; i++) { + + u16 orig = *(u16*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) { + stage_max -= 4 * ARITH_MAX; + continue; + } + + stage_cur_byte = i; + + for (j = 1; j <= ARITH_MAX; j++) { + + u16 r1 = orig ^ (orig + j), + r2 = orig ^ (orig - j), + r3 = orig ^ SWAP16(SWAP16(orig) + j), + r4 = orig ^ SWAP16(SWAP16(orig) - j); + + /* Try little endian addition and subtraction first. Do it only + if the operation would affect more than one byte (hence the + & 0xff overflow checks) and if it couldn't be a product of + a bitflip. */ + + stage_val_type = STAGE_VAL_LE; + + if ((orig & 0xff) + j > 0xff && !could_be_bitflip(r1)) { + + stage_cur_val = j; + *(u16*)(out_buf + i) = orig + j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((orig & 0xff) < j && !could_be_bitflip(r2)) { + + stage_cur_val = -j; + *(u16*)(out_buf + i) = orig - j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + /* Big endian comes next. Same deal. */ + + stage_val_type = STAGE_VAL_BE; + + + if ((orig >> 8) + j > 0xff && !could_be_bitflip(r3)) { + + stage_cur_val = j; + *(u16*)(out_buf + i) = SWAP16(SWAP16(orig) + j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((orig >> 8) < j && !could_be_bitflip(r4)) { + + stage_cur_val = -j; + *(u16*)(out_buf + i) = SWAP16(SWAP16(orig) - j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + *(u16*)(out_buf + i) = orig; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_ARITH16] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_ARITH16] += stage_max; + + + + + /* 32-bit arithmetics, both endians. */ + + if (len < 4) goto skip_arith; + + stage_name = "arith 32/8"; + stage_short = "arith32"; + stage_cur = 0; + stage_max = 4 * (len - 3) * ARITH_MAX; + + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 3; i++) { + + u32 orig = *(u32*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] && + !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) { + stage_max -= 4 * ARITH_MAX; + continue; + } + + stage_cur_byte = i; + + for (j = 1; j <= ARITH_MAX; j++) { + + u32 r1 = orig ^ (orig + j), + r2 = orig ^ (orig - j), + r3 = orig ^ SWAP32(SWAP32(orig) + j), + r4 = orig ^ SWAP32(SWAP32(orig) - j); + + /* Little endian first. Same deal as with 16-bit: we only want to + try if the operation would have effect on more than two bytes. */ + + stage_val_type = STAGE_VAL_LE; + + if ((orig & 0xffff) + j > 0xffff && !could_be_bitflip(r1)) { + + stage_cur_val = j; + *(u32*)(out_buf + i) = orig + j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((orig & 0xffff) < j && !could_be_bitflip(r2)) { + + stage_cur_val = -j; + *(u32*)(out_buf + i) = orig - j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + /* Big endian next. */ + + stage_val_type = STAGE_VAL_BE; + + if ((SWAP32(orig) & 0xffff) + j > 0xffff && !could_be_bitflip(r3)) { + + stage_cur_val = j; + *(u32*)(out_buf + i) = SWAP32(SWAP32(orig) + j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((SWAP32(orig) & 0xffff) < j && !could_be_bitflip(r4)) { + + stage_cur_val = -j; + *(u32*)(out_buf + i) = SWAP32(SWAP32(orig) - j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + *(u32*)(out_buf + i) = orig; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_ARITH32] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_ARITH32] += stage_max; + + + + + skip_arith: + + /********************** + * INTERESTING VALUES * + **********************/ + + stage_name = "interest 8/8"; + stage_short = "int8"; + stage_cur = 0; + stage_max = len * sizeof(interesting_8); + + + + stage_val_type = STAGE_VAL_LE; + + orig_hit_cnt = new_hit_cnt; + + /* Setting 8-bit integers. */ + + for (i = 0; i < len; i++) { + + u8 orig = out_buf[i]; + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)]) { + stage_max -= sizeof(interesting_8); + continue; + } + + stage_cur_byte = i; + + for (j = 0; j < sizeof(interesting_8); j++) { + + /* Skip if the value could be a product of bitflips or arithmetics. */ + + if (could_be_bitflip(orig ^ (u8)interesting_8[j]) || + could_be_arith(orig, (u8)interesting_8[j], 1)) { + stage_max--; + continue; + } + + stage_cur_val = interesting_8[j]; + out_buf[i] = interesting_8[j]; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + out_buf[i] = orig; + stage_cur++; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_INTEREST8] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_INTEREST8] += stage_max; + + + + + /* Setting 16-bit integers, both endians. */ + + if (no_arith || len < 2) goto skip_interest; + + stage_name = "interest 16/8"; + stage_short = "int16"; + stage_cur = 0; + stage_max = 2 * (len - 1) * (sizeof(interesting_16) >> 1); + + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 1; i++) { + + u16 orig = *(u16*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) { + stage_max -= sizeof(interesting_16); + continue; + } + + stage_cur_byte = i; + + for (j = 0; j < sizeof(interesting_16) / 2; j++) { + + stage_cur_val = interesting_16[j]; + + /* Skip if this could be a product of a bitflip, arithmetics, + or single-byte interesting value insertion. */ + + if (!could_be_bitflip(orig ^ (u16)interesting_16[j]) && + !could_be_arith(orig, (u16)interesting_16[j], 2) && + !could_be_interest(orig, (u16)interesting_16[j], 2, 0)) { + + stage_val_type = STAGE_VAL_LE; + + *(u16*)(out_buf + i) = interesting_16[j]; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((u16)interesting_16[j] != SWAP16(interesting_16[j]) && + !could_be_bitflip(orig ^ SWAP16(interesting_16[j])) && + !could_be_arith(orig, SWAP16(interesting_16[j]), 2) && + !could_be_interest(orig, SWAP16(interesting_16[j]), 2, 1)) { + + stage_val_type = STAGE_VAL_BE; + + *(u16*)(out_buf + i) = SWAP16(interesting_16[j]); + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + } + + *(u16*)(out_buf + i) = orig; + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_INTEREST16] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_INTEREST16] += stage_max; + + + + + + if (len < 4) goto skip_interest; + + /* Setting 32-bit integers, both endians. */ + + stage_name = "interest 32/8"; + stage_short = "int32"; + stage_cur = 0; + stage_max = 2 * (len - 3) * (sizeof(interesting_32) >> 2); + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 3; i++) { + + u32 orig = *(u32*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] && + !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) { + stage_max -= sizeof(interesting_32) >> 1; + continue; + } + + stage_cur_byte = i; + + for (j = 0; j < sizeof(interesting_32) / 4; j++) { + + stage_cur_val = interesting_32[j]; + + /* Skip if this could be a product of a bitflip, arithmetics, + or word interesting value insertion. */ + + if (!could_be_bitflip(orig ^ (u32)interesting_32[j]) && + !could_be_arith(orig, interesting_32[j], 4) && + !could_be_interest(orig, interesting_32[j], 4, 0)) { + + stage_val_type = STAGE_VAL_LE; + + *(u32*)(out_buf + i) = interesting_32[j]; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((u32)interesting_32[j] != SWAP32(interesting_32[j]) && + !could_be_bitflip(orig ^ SWAP32(interesting_32[j])) && + !could_be_arith(orig, SWAP32(interesting_32[j]), 4) && + !could_be_interest(orig, SWAP32(interesting_32[j]), 4, 1)) { + + stage_val_type = STAGE_VAL_BE; + + *(u32*)(out_buf + i) = SWAP32(interesting_32[j]); + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + } + + *(u32*)(out_buf + i) = orig; + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_INTEREST32] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_INTEREST32] += stage_max; + + + + + + skip_interest: + + /******************** + * DICTIONARY STUFF * + ********************/ + + if (!extras_cnt) goto skip_user_extras; + + /* Overwrite with user-supplied extras. */ + + stage_name = "user extras (over)"; + stage_short = "ext_UO"; + stage_cur = 0; + stage_max = extras_cnt * len; + + + + + stage_val_type = STAGE_VAL_NONE; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len; i++) { + + u32 last_len = 0; + + stage_cur_byte = i; + + /* Extras are sorted by size, from smallest to largest. This means + that we don't have to worry about restoring the buffer in + between writes at a particular offset determined by the outer + loop. */ + + for (j = 0; j < extras_cnt; j++) { + + /* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also + skip them if there's no room to insert the payload, if the token + is redundant, or if its entire span has no bytes set in the effector + map. */ + + if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) || + extras[j].len > len - i || + !memcmp(extras[j].data, out_buf + i, extras[j].len) || + !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) { + + stage_max--; + continue; + + } + + last_len = extras[j].len; + memcpy(out_buf + i, extras[j].data, last_len); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + stage_cur++; + + } + + /* Restore all the clobbered memory. */ + memcpy(out_buf + i, in_buf + i, last_len); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_EXTRAS_UO] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_EXTRAS_UO] += stage_max; + + /* Insertion of user-supplied extras. */ + + stage_name = "user extras (insert)"; + stage_short = "ext_UI"; + stage_cur = 0; + stage_max = extras_cnt * len; + + + + + orig_hit_cnt = new_hit_cnt; + + ex_tmp = ck_alloc(len + MAX_DICT_FILE); + + for (i = 0; i <= len; i++) { + + stage_cur_byte = i; + + for (j = 0; j < extras_cnt; j++) { + + if (len + extras[j].len > MAX_FILE) { + stage_max--; + continue; + } + + /* Insert token */ + memcpy(ex_tmp + i, extras[j].data, extras[j].len); + + /* Copy tail */ + memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i); + + if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) { + ck_free(ex_tmp); + goto abandon_entry; + } + + stage_cur++; + + } + + /* Copy head */ + ex_tmp[i] = out_buf[i]; + + } + + ck_free(ex_tmp); + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_EXTRAS_UI] += stage_max; + + skip_user_extras: + + if (!a_extras_cnt) goto skip_extras; + + stage_name = "auto extras (over)"; + stage_short = "ext_AO"; + stage_cur = 0; + stage_max = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len; + + + stage_val_type = STAGE_VAL_NONE; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len; i++) { + + u32 last_len = 0; + + stage_cur_byte = i; + + for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) { + + /* See the comment in the earlier code; extras are sorted by size. */ + + if (a_extras[j].len > len - i || + !memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) || + !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) { + + stage_max--; + continue; + + } + + last_len = a_extras[j].len; + memcpy(out_buf + i, a_extras[j].data, last_len); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + stage_cur++; + + } + + /* Restore all the clobbered memory. */ + memcpy(out_buf + i, in_buf + i, last_len); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_EXTRAS_AO] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_EXTRAS_AO] += stage_max; + + skip_extras: + + /* If we made this to here without jumping to havoc_stage or abandon_entry, + we're properly done with deterministic steps and can mark it as such + in the .state/ directory. */ + + if (!queue_cur->passed_det) mark_as_det_done(queue_cur); + + /**************** + * RANDOM HAVOC * + ****************/ + + havoc_stage: + pacemaker_fuzzing: + + + stage_cur_byte = -1; + + /* The havoc stage mutation code is also invoked when splicing files; if the + splice_cycle variable is set, generate different descriptions and such. */ + + if (!splice_cycle) { + + stage_name = "MOpt-havoc"; + stage_short = "MOpt-havoc"; + stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / havoc_div / 100; + + } + else { + + static u8 tmp[32]; + + perf_score = orig_perf; + + sprintf(tmp, "MOpt-splice %u", splice_cycle); + stage_name = tmp; + stage_short = "MOpt-splice"; + stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100; + + } + + s32 temp_len_puppet; + cur_ms_lv = get_cur_time(); + + + { + + + if (key_puppet == 1) + { + if (unlikely(orig_hit_cnt_puppet == 0)) + { + orig_hit_cnt_puppet = queued_paths + unique_crashes; + last_limit_time_start = get_cur_time(); + SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low); + } + } + + + { + havoc_stage_puppet: + + stage_cur_byte = -1; + + /* The havoc stage mutation code is also invoked when splicing files; if the + splice_cycle variable is set, generate different descriptions and such. */ + + if (!splice_cycle) { + + stage_name = "MOpt avoc"; + stage_short = "MOpt havoc"; + stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / havoc_div / 100; + + } + else { + static u8 tmp[32]; + perf_score = orig_perf; + sprintf(tmp, "MOpt splice %u", splice_cycle); + stage_name = tmp; + stage_short = "MOpt splice"; + stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100; + } + + + + if (stage_max < HAVOC_MIN) stage_max = HAVOC_MIN; + + temp_len = len; + + orig_hit_cnt = queued_paths + unique_crashes; + + havoc_queued = queued_paths; + + + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2)); + + stage_cur_val = use_stacking; + + + for (i = 0; i < operator_num; i++) + { + stage_cycles_puppet_v3[swarm_now][i] = stage_cycles_puppet_v2[swarm_now][i]; + } + + + for (i = 0; i < use_stacking; i++) { + + switch (select_algorithm()) { + + case 0: + /* Flip a single bit somewhere. Spooky! */ + FLIP_BIT(out_buf, UR(temp_len << 3)); + stage_cycles_puppet_v2[swarm_now][STAGE_FLIP1] += 1; + break; + + + case 1: + if (temp_len < 2) break; + temp_len_puppet = UR(temp_len << 3); + FLIP_BIT(out_buf, temp_len_puppet); + FLIP_BIT(out_buf, temp_len_puppet + 1); + stage_cycles_puppet_v2[swarm_now][STAGE_FLIP2] += 1; + break; + + case 2: + if (temp_len < 2) break; + temp_len_puppet = UR(temp_len << 3); + FLIP_BIT(out_buf, temp_len_puppet); + FLIP_BIT(out_buf, temp_len_puppet + 1); + FLIP_BIT(out_buf, temp_len_puppet + 2); + FLIP_BIT(out_buf, temp_len_puppet + 3); + stage_cycles_puppet_v2[swarm_now][STAGE_FLIP4] += 1; + break; + + case 3: + if (temp_len < 4) break; + out_buf[UR(temp_len)] ^= 0xFF; + stage_cycles_puppet_v2[swarm_now][STAGE_FLIP8] += 1; + break; + + case 4: + if (temp_len < 8) break; + *(u16*)(out_buf + UR(temp_len - 1)) ^= 0xFFFF; + stage_cycles_puppet_v2[swarm_now][STAGE_FLIP16] += 1; + break; + + case 5: + if (temp_len < 8) break; + *(u32*)(out_buf + UR(temp_len - 3)) ^= 0xFFFFFFFF; + stage_cycles_puppet_v2[swarm_now][STAGE_FLIP32] += 1; + break; + + case 6: + out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX); + out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX); + stage_cycles_puppet_v2[swarm_now][STAGE_ARITH8] += 1; + break; + + case 7: + /* Randomly subtract from word, random endian. */ + if (temp_len < 8) break; + if (UR(2)) { + u32 pos = UR(temp_len - 1); + *(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 1); + u16 num = 1 + UR(ARITH_MAX); + *(u16*)(out_buf + pos) = + SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num); + } + /* Randomly add to word, random endian. */ + if (UR(2)) { + u32 pos = UR(temp_len - 1); + *(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 1); + u16 num = 1 + UR(ARITH_MAX); + *(u16*)(out_buf + pos) = + SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num); + } + stage_cycles_puppet_v2[swarm_now][STAGE_ARITH16] += 1; + break; + + + case 8: + /* Randomly subtract from dword, random endian. */ + if (temp_len < 8) break; + if (UR(2)) { + u32 pos = UR(temp_len - 3); + *(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 3); + u32 num = 1 + UR(ARITH_MAX); + *(u32*)(out_buf + pos) = + SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num); + } + /* Randomly add to dword, random endian. */ + //if (temp_len < 4) break; + if (UR(2)) { + u32 pos = UR(temp_len - 3); + *(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 3); + u32 num = 1 + UR(ARITH_MAX); + *(u32*)(out_buf + pos) = + SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num); + } + stage_cycles_puppet_v2[swarm_now][STAGE_ARITH32] += 1; + break; + + + case 9: + /* Set byte to interesting value. */ + if (temp_len < 4) break; + out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))]; + stage_cycles_puppet_v2[swarm_now][STAGE_INTEREST8] += 1; + break; + + case 10: + /* Set word to interesting value, randomly choosing endian. */ + if (temp_len < 8) break; + if (UR(2)) { + *(u16*)(out_buf + UR(temp_len - 1)) = + interesting_16[UR(sizeof(interesting_16) >> 1)]; + } + else { + *(u16*)(out_buf + UR(temp_len - 1)) = SWAP16( + interesting_16[UR(sizeof(interesting_16) >> 1)]); + } + stage_cycles_puppet_v2[swarm_now][STAGE_INTEREST16] += 1; + break; + + + case 11: + /* Set dword to interesting value, randomly choosing endian. */ + + if (temp_len < 8) break; + + if (UR(2)) { + *(u32*)(out_buf + UR(temp_len - 3)) = + interesting_32[UR(sizeof(interesting_32) >> 2)]; + } + else { + *(u32*)(out_buf + UR(temp_len - 3)) = SWAP32( + interesting_32[UR(sizeof(interesting_32) >> 2)]); + } + stage_cycles_puppet_v2[swarm_now][STAGE_INTEREST32] += 1; + break; + + + case 12: + + /* Just set a random byte to a random value. Because, + why not. We use XOR with 1-255 to eliminate the + possibility of a no-op. */ + + out_buf[UR(temp_len)] ^= 1 + UR(255); + stage_cycles_puppet_v2[swarm_now][STAGE_RANDOMBYTE] += 1; + break; + + + + case 13: { + + /* Delete bytes. We're making this a bit more likely + than insertion (the next option) in hopes of keeping + files reasonably small. */ + + u32 del_from, del_len; + + if (temp_len < 2) break; + + /* Don't delete too much. */ + + del_len = choose_block_len(temp_len - 1); + + del_from = UR(temp_len - del_len + 1); + + memmove(out_buf + del_from, out_buf + del_from + del_len, + temp_len - del_from - del_len); + + temp_len -= del_len; + stage_cycles_puppet_v2[swarm_now][STAGE_DELETEBYTE] += 1; + break; + + } + + case 14: + + if (temp_len + HAVOC_BLK_XL < MAX_FILE) { + + /* Clone bytes (75%) or insert a block of constant bytes (25%). */ + + u8 actually_clone = UR(4); + u32 clone_from, clone_to, clone_len; + u8* new_buf; + + if (actually_clone) { + + clone_len = choose_block_len(temp_len); + clone_from = UR(temp_len - clone_len + 1); + + } + else { + + clone_len = choose_block_len(HAVOC_BLK_XL); + clone_from = 0; + + } + + clone_to = UR(temp_len); + + new_buf = ck_alloc_nozero(temp_len + clone_len); + + /* Head */ + + memcpy(new_buf, out_buf, clone_to); + + /* Inserted part */ + + if (actually_clone) + memcpy(new_buf + clone_to, out_buf + clone_from, clone_len); + else + memset(new_buf + clone_to, + UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len); + + /* Tail */ + memcpy(new_buf + clone_to + clone_len, out_buf + clone_to, + temp_len - clone_to); + + ck_free(out_buf); + out_buf = new_buf; + temp_len += clone_len; + stage_cycles_puppet_v2[swarm_now][STAGE_Clone75] += 1; + } + + break; + + case 15: { + + /* Overwrite bytes with a randomly selected chunk (75%) or fixed + bytes (25%). */ + + u32 copy_from, copy_to, copy_len; + + if (temp_len < 2) break; + + copy_len = choose_block_len(temp_len - 1); + + copy_from = UR(temp_len - copy_len + 1); + copy_to = UR(temp_len - copy_len + 1); + + if (UR(4)) { + + if (copy_from != copy_to) + memmove(out_buf + copy_to, out_buf + copy_from, copy_len); + + } + else memset(out_buf + copy_to, + UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len); + stage_cycles_puppet_v2[swarm_now][STAGE_OverWrite75] += 1; + break; + + } + + + } + + } + + + tmp_pilot_time += 1; + + + + + u64 temp_total_found = queued_paths + unique_crashes; + + + + + if (common_fuzz_stuff(argv, out_buf, temp_len)) + goto abandon_entry_puppet; + + /* out_buf might have been mangled a bit, so let's restore it to its + original size and shape. */ + + if (temp_len < len) out_buf = ck_realloc(out_buf, len); + temp_len = len; + memcpy(out_buf, in_buf, len); + + /* If we're finding new stuff, let's run for a bit longer, limits + permitting. */ + + if (queued_paths != havoc_queued) { + + if (perf_score <= havoc_max_mult * 100) { + stage_max *= 2; + perf_score *= 2; + } + + havoc_queued = queued_paths; + + } + + if (unlikely(queued_paths + unique_crashes > temp_total_found)) + { + u64 temp_temp_puppet = queued_paths + unique_crashes - temp_total_found; + total_puppet_find = total_puppet_find + temp_temp_puppet; + for (i = 0; i < 16; i++) + { + if (stage_cycles_puppet_v2[swarm_now][i] > stage_cycles_puppet_v3[swarm_now][i]) + stage_finds_puppet_v2[swarm_now][i] += temp_temp_puppet; + } + } + + } + new_hit_cnt = queued_paths + unique_crashes; + +#ifndef IGNORE_FINDS + + /************ + * SPLICING * + ************/ + + + retry_splicing_puppet: + + if (use_splicing && splice_cycle++ < SPLICE_CYCLES_puppet && + queued_paths > 1 && queue_cur->len > 1) { + + struct queue_entry* target; + u32 tid, split_at; + u8* new_buf; + s32 f_diff, l_diff; + + /* First of all, if we've modified in_buf for havoc, let's clean that + up... */ + + if (in_buf != orig_in) { + ck_free(in_buf); + in_buf = orig_in; + len = queue_cur->len; + } + + /* Pick a random queue entry and seek to it. Don't splice with yourself. */ + + do { tid = UR(queued_paths); } while (tid == current_entry); + + splicing_with = tid; + target = queue; + + while (tid >= 100) { target = target->next_100; tid -= 100; } + while (tid--) target = target->next; + + /* Make sure that the target has a reasonable length. */ + + while (target && (target->len < 2 || target == queue_cur)) { + target = target->next; + splicing_with++; + } + + if (!target) goto retry_splicing_puppet; + + /* Read the testcase into a new buffer. */ + + fd = open(target->fname, O_RDONLY); + + if (fd < 0) PFATAL("Unable to open '%s'", target->fname); + + new_buf = ck_alloc_nozero(target->len); + + ck_read(fd, new_buf, target->len, target->fname); + + close(fd); + + /* Find a suitable splicin g location, somewhere between the first and + the last differing byte. Bail out if the difference is just a single + byte or so. */ + + locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff); + + if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) { + ck_free(new_buf); + goto retry_splicing_puppet; + } + + /* Split somewhere between the first and last differing byte. */ + + split_at = f_diff + UR(l_diff - f_diff); + + /* Do the thing. */ + + len = target->len; + memcpy(new_buf, in_buf, split_at); + in_buf = new_buf; + ck_free(out_buf); + out_buf = ck_alloc_nozero(len); + memcpy(out_buf, in_buf, len); + goto havoc_stage_puppet; + + } + +#endif /* !IGNORE_FINDS */ + + ret_val = 0; + + abandon_entry: + abandon_entry_puppet: + + if (splice_cycle >= SPLICE_CYCLES_puppet) + SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low); + + + splicing_with = -1; + + /* Update pending_not_fuzzed count if we made it through the calibration + cycle and have not seen this entry before. */ + + // if (!stop_soon && !queue_cur->cal_failed && !queue_cur->was_fuzzed) { + // queue_cur->was_fuzzed = 1; + // pending_not_fuzzed--; + // if (queue_cur->favored) pending_favored--; + // } + + munmap(orig_in, queue_cur->len); + + if (in_buf != orig_in) ck_free(in_buf); + ck_free(out_buf); + ck_free(eff_map); + + + if (key_puppet == 1) + { + if (unlikely(queued_paths + unique_crashes > ((queued_paths + unique_crashes)*limit_time_bound + orig_hit_cnt_puppet))) + { + key_puppet = 0; + cur_ms_lv = get_cur_time(); + new_hit_cnt = queued_paths + unique_crashes; + orig_hit_cnt_puppet = 0; + last_limit_time_start = 0; + } + } + + + if (unlikely(tmp_pilot_time > period_pilot)) + { + total_pacemaker_time += tmp_pilot_time; + new_hit_cnt = queued_paths + unique_crashes; + swarm_fitness[swarm_now] = (double)(total_puppet_find - temp_puppet_find) / ((double)(tmp_pilot_time)/ period_pilot_tmp); + tmp_pilot_time = 0; + temp_puppet_find = total_puppet_find; + + u64 temp_stage_finds_puppet = 0; + for (i = 0; i < operator_num; i++) + { + double temp_eff = 0.0; + + if (stage_cycles_puppet_v2[swarm_now][i] > stage_cycles_puppet[swarm_now][i]) + temp_eff = (double)(stage_finds_puppet_v2[swarm_now][i] - stage_finds_puppet[swarm_now][i]) / + (double)(stage_cycles_puppet_v2[swarm_now][i] - stage_cycles_puppet[swarm_now][i]); + + if (eff_best[swarm_now][i] < temp_eff) + { + eff_best[swarm_now][i] = temp_eff; + L_best[swarm_now][i] = x_now[swarm_now][i]; + } + + stage_finds_puppet[swarm_now][i] = stage_finds_puppet_v2[swarm_now][i]; + stage_cycles_puppet[swarm_now][i] = stage_cycles_puppet_v2[swarm_now][i]; + temp_stage_finds_puppet += stage_finds_puppet[swarm_now][i]; + } + + swarm_now = swarm_now + 1; + if (swarm_now == swarm_num) + { + key_module = 1; + for (i = 0; i < operator_num; i++) + { + core_operator_cycles_puppet_v2[i] = core_operator_cycles_puppet[i]; + core_operator_cycles_puppet_v3[i] = core_operator_cycles_puppet[i]; + core_operator_finds_puppet_v2[i] = core_operator_finds_puppet[i]; + } + + double swarm_eff = 0.0; + swarm_now = 0; + for (i = 0; i < swarm_num; i++) + { + if (swarm_fitness[i] > swarm_eff) + { + swarm_eff = swarm_fitness[i]; + swarm_now = i; + } + } + if (swarm_now <0 || swarm_now > swarm_num - 1) + PFATAL("swarm_now error number %d", swarm_now); + + } + + + } + return ret_val; + } + } + + +#undef FLIP_BIT + +} + + + +static u8 core_fuzzing(char** argv) { + int i; + + if (swarm_num == 1) + { + key_module = 2; + return 0; + } + + + s32 len, fd, temp_len, j; + u8 *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0; + u64 havoc_queued, orig_hit_cnt, new_hit_cnt, cur_ms_lv; + u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1; + + u8 ret_val = 1, doing_det = 0; + + u8 a_collect[MAX_AUTO_EXTRA]; + u32 a_len = 0; + +#ifdef IGNORE_FINDS + + /* In IGNORE_FINDS mode, skip any entries that weren't in the + initial data set. */ + + if (queue_cur->depth > 1) return 1; + +#else + + if (pending_favored) { + + /* If we have any favored, non-fuzzed new arrivals in the queue, + possibly skip to them at the expense of already-fuzzed or non-favored + cases. */ + + if ((queue_cur->was_fuzzed || !queue_cur->favored) && + UR(100) < SKIP_TO_NEW_PROB) return 1; + + } + else if (!dumb_mode && !queue_cur->favored && queued_paths > 10) { + + /* Otherwise, still possibly skip non-favored cases, albeit less often. + The odds of skipping stuff are higher for already-fuzzed inputs and + lower for never-fuzzed entries. */ + + if (queue_cycle > 1 && !queue_cur->was_fuzzed) { + + if (UR(100) < SKIP_NFAV_NEW_PROB) return 1; + + } + else { + + if (UR(100) < SKIP_NFAV_OLD_PROB) return 1; + + } + + } + +#endif /* ^IGNORE_FINDS */ + + if (not_on_tty) { + ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...", + current_entry, queued_paths, unique_crashes); + fflush(stdout); + } + + /* Map the test case into memory. */ + + fd = open(queue_cur->fname, O_RDONLY); + + if (fd < 0) PFATAL("Unable to open '%s'", queue_cur->fname); + + len = queue_cur->len; + + orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (orig_in == MAP_FAILED) PFATAL("Unable to mmap '%s'", queue_cur->fname); + + close(fd); + + /* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every + single byte anyway, so it wouldn't give us any performance or memory usage + benefits. */ + + out_buf = ck_alloc_nozero(len); + + subseq_tmouts = 0; + + cur_depth = queue_cur->depth; + + /******************************************* + * CALIBRATION (only if failed earlier on) * + *******************************************/ + + if (queue_cur->cal_failed) { + + u8 res = FAULT_TMOUT; + + if (queue_cur->cal_failed < CAL_CHANCES) { + + res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0); + + if (res == FAULT_ERROR) + FATAL("Unable to execute target application"); + + } + + if (stop_soon || res != crash_mode) { + cur_skipped_paths++; + goto abandon_entry; + } + + } + + /************ + * TRIMMING * + ************/ + + if (!dumb_mode && !queue_cur->trim_done) { + + u8 res = trim_case(argv, queue_cur, in_buf); + + if (res == FAULT_ERROR) + FATAL("Unable to execute target application"); + + if (stop_soon) { + cur_skipped_paths++; + goto abandon_entry; + } + + /* Don't retry trimming, even if it failed. */ + + queue_cur->trim_done = 1; + + if (len != queue_cur->len) len = queue_cur->len; + + } + + memcpy(out_buf, in_buf, len); + + /********************* + * PERFORMANCE SCORE * + *********************/ + + orig_perf = perf_score = calculate_score(queue_cur); + + /* Skip right away if -d is given, if we have done deterministic fuzzing on + this entry ourselves (was_fuzzed), or if it has gone through deterministic + testing in earlier, resumed runs (passed_det). */ + + if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det) + goto havoc_stage; + + /* Skip deterministic fuzzing if exec path checksum puts this out of scope + for this master instance. */ + + if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1) + goto havoc_stage; + + + cur_ms_lv = get_cur_time(); + if (!(key_puppet == 0 && ((cur_ms_lv - last_path_time < limit_time_puppet) || + (last_crash_time != 0 && cur_ms_lv - last_crash_time < limit_time_puppet) || last_path_time == 0))) + { + key_puppet = 1; + goto pacemaker_fuzzing; + } + + doing_det = 1; + + /********************************************* + * SIMPLE BITFLIP (+dictionary construction) * + *********************************************/ + +#define FLIP_BIT(_ar, _b) do { \ + u8* _arf = (u8*)(_ar); \ + u32 _bf = (_b); \ + _arf[(_bf) >> 3] ^= (128 >> ((_bf) & 7)); \ + } while (0) + + /* Single walking bit. */ + + stage_short = "flip1"; + stage_max = len << 3; + stage_name = "bitflip 1/1"; + + stage_val_type = STAGE_VAL_NONE; + + orig_hit_cnt = queued_paths + unique_crashes; + + prev_cksum = queue_cur->exec_cksum; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur >> 3; + + FLIP_BIT(out_buf, stage_cur); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + FLIP_BIT(out_buf, stage_cur); + + /* While flipping the least significant bit in every byte, pull of an extra + trick to detect possible syntax tokens. In essence, the idea is that if + you have a binary blob like this: + + xxxxxxxxIHDRxxxxxxxx + + ...and changing the leading and trailing bytes causes variable or no + changes in program flow, but touching any character in the "IHDR" string + always produces the same, distinctive path, it's highly likely that + "IHDR" is an atomically-checked magic value of special significance to + the fuzzed format. + + We do this here, rather than as a separate stage, because it's a nice + way to keep the operation approximately "free" (i.e., no extra execs). + + Empirically, performing the check when flipping the least significant bit + is advantageous, compared to doing it at the time of more disruptive + changes, where the program flow may be affected in more violent ways. + + The caveat is that we won't generate dictionaries in the -d mode or -S + mode - but that's probably a fair trade-off. + + This won't work particularly well with paths that exhibit variable + behavior, but fails gracefully, so we'll carry out the checks anyway. + + */ + + if (!dumb_mode && (stage_cur & 7) == 7) { + + u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); + + if (stage_cur == stage_max - 1 && cksum == prev_cksum) { + + /* If at end of file and we are still collecting a string, grab the + final character and force output. */ + + if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3]; + a_len++; + + if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA) + maybe_add_auto(a_collect, a_len); + + } + else if (cksum != prev_cksum) { + + /* Otherwise, if the checksum has changed, see if we have something + worthwhile queued up, and collect that if the answer is yes. */ + + if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA) + maybe_add_auto(a_collect, a_len); + + a_len = 0; + prev_cksum = cksum; + + } + + /* Continue collecting string, but only if the bit flip actually made + any difference - we don't want no-op tokens. */ + + if (cksum != queue_cur->exec_cksum) { + + if (a_len < MAX_AUTO_EXTRA) a_collect[a_len] = out_buf[stage_cur >> 3]; + a_len++; + + } + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP1] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP1] += stage_max; + + + + /* Two walking bits. */ + + stage_name = "bitflip 2/1"; + stage_short = "flip2"; + stage_max = (len << 3) - 1; + + orig_hit_cnt = new_hit_cnt; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur >> 3; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP2] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP2] += stage_max; + + + /* Four walking bits. */ + + stage_name = "bitflip 4/1"; + stage_short = "flip4"; + stage_max = (len << 3) - 3; + + + orig_hit_cnt = new_hit_cnt; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur >> 3; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + FLIP_BIT(out_buf, stage_cur + 2); + FLIP_BIT(out_buf, stage_cur + 3); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + FLIP_BIT(out_buf, stage_cur); + FLIP_BIT(out_buf, stage_cur + 1); + FLIP_BIT(out_buf, stage_cur + 2); + FLIP_BIT(out_buf, stage_cur + 3); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP4] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP4] += stage_max; + + + /* Effector map setup. These macros calculate: + + EFF_APOS - position of a particular file offset in the map. + EFF_ALEN - length of a map with a particular number of bytes. + EFF_SPAN_ALEN - map span for a sequence of bytes. + + */ + +#define EFF_APOS(_p) ((_p) >> EFF_MAP_SCALE2) +#define EFF_REM(_x) ((_x) & ((1 << EFF_MAP_SCALE2) - 1)) +#define EFF_ALEN(_l) (EFF_APOS(_l) + !!EFF_REM(_l)) +#define EFF_SPAN_ALEN(_p, _l) (EFF_APOS((_p) + (_l) - 1) - EFF_APOS(_p) + 1) + + /* Initialize effector map for the next step (see comments below). Always + flag first and last byte as doing something. */ + + eff_map = ck_alloc(EFF_ALEN(len)); + eff_map[0] = 1; + + if (EFF_APOS(len - 1) != 0) { + eff_map[EFF_APOS(len - 1)] = 1; + eff_cnt++; + } + + /* Walking byte. */ + + stage_name = "bitflip 8/8"; + stage_short = "flip8"; + stage_max = len; + + + orig_hit_cnt = new_hit_cnt; + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + stage_cur_byte = stage_cur; + + out_buf[stage_cur] ^= 0xFF; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + /* We also use this stage to pull off a simple trick: we identify + bytes that seem to have no effect on the current execution path + even when fully flipped - and we skip them during more expensive + deterministic stages, such as arithmetics or known ints. */ + + if (!eff_map[EFF_APOS(stage_cur)]) { + + u32 cksum; + + /* If in dumb mode or if the file is very short, just flag everything + without wasting time on checksums. */ + + if (!dumb_mode && len >= EFF_MIN_LEN) + cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST); + else + cksum = ~queue_cur->exec_cksum; + + if (cksum != queue_cur->exec_cksum) { + eff_map[EFF_APOS(stage_cur)] = 1; + eff_cnt++; + } + + } + + out_buf[stage_cur] ^= 0xFF; + + } + + /* If the effector map is more than EFF_MAX_PERC dense, just flag the + whole thing as worth fuzzing, since we wouldn't be saving much time + anyway. */ + + if (eff_cnt != EFF_ALEN(len) && + eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC) { + + memset(eff_map, 1, EFF_ALEN(len)); + + blocks_eff_select += EFF_ALEN(len); + + } + else { + + blocks_eff_select += eff_cnt; + + } + + blocks_eff_total += EFF_ALEN(len); + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP8] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP8] += stage_max; + + + + /* Two walking bytes. */ + + if (len < 2) goto skip_bitflip; + + stage_name = "bitflip 16/8"; + stage_short = "flip16"; + stage_cur = 0; + stage_max = len - 1; + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 1; i++) { + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) { + stage_max--; + continue; + } + + stage_cur_byte = i; + + *(u16*)(out_buf + i) ^= 0xFFFF; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + *(u16*)(out_buf + i) ^= 0xFFFF; + + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP16] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP16] += stage_max; + + + + if (len < 4) goto skip_bitflip; + + /* Four walking bytes. */ + + stage_name = "bitflip 32/8"; + stage_short = "flip32"; + stage_cur = 0; + stage_max = len - 3; + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 3; i++) { + + /* Let's consult the effector map... */ + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] && + !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) { + stage_max--; + continue; + } + + stage_cur_byte = i; + + *(u32*)(out_buf + i) ^= 0xFFFFFFFF; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + *(u32*)(out_buf + i) ^= 0xFFFFFFFF; + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_FLIP32] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_FLIP32] += stage_max; + + + + + skip_bitflip: + + if (no_arith) goto skip_arith; + + /********************** + * ARITHMETIC INC/DEC * + **********************/ + + /* 8-bit arithmetics. */ + + stage_name = "arith 8/8"; + stage_short = "arith8"; + stage_cur = 0; + stage_max = 2 * len * ARITH_MAX; + + + stage_val_type = STAGE_VAL_LE; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len; i++) { + + u8 orig = out_buf[i]; + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)]) { + stage_max -= 2 * ARITH_MAX; + continue; + } + + stage_cur_byte = i; + + for (j = 1; j <= ARITH_MAX; j++) { + + u8 r = orig ^ (orig + j); + + /* Do arithmetic operations only if the result couldn't be a product + of a bitflip. */ + + if (!could_be_bitflip(r)) { + + stage_cur_val = j; + out_buf[i] = orig + j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + r = orig ^ (orig - j); + + if (!could_be_bitflip(r)) { + + stage_cur_val = -j; + out_buf[i] = orig - j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + out_buf[i] = orig; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_ARITH8] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_ARITH8] += stage_max; + + + + + /* 16-bit arithmetics, both endians. */ + + if (len < 2) goto skip_arith; + + stage_name = "arith 16/8"; + stage_short = "arith16"; + stage_cur = 0; + stage_max = 4 * (len - 1) * ARITH_MAX; + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 1; i++) { + + u16 orig = *(u16*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) { + stage_max -= 4 * ARITH_MAX; + continue; + } + + stage_cur_byte = i; + + for (j = 1; j <= ARITH_MAX; j++) { + + u16 r1 = orig ^ (orig + j), + r2 = orig ^ (orig - j), + r3 = orig ^ SWAP16(SWAP16(orig) + j), + r4 = orig ^ SWAP16(SWAP16(orig) - j); + + /* Try little endian addition and subtraction first. Do it only + if the operation would affect more than one byte (hence the + & 0xff overflow checks) and if it couldn't be a product of + a bitflip. */ + + stage_val_type = STAGE_VAL_LE; + + if ((orig & 0xff) + j > 0xff && !could_be_bitflip(r1)) { + + stage_cur_val = j; + *(u16*)(out_buf + i) = orig + j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((orig & 0xff) < j && !could_be_bitflip(r2)) { + + stage_cur_val = -j; + *(u16*)(out_buf + i) = orig - j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + /* Big endian comes next. Same deal. */ + + stage_val_type = STAGE_VAL_BE; + + + if ((orig >> 8) + j > 0xff && !could_be_bitflip(r3)) { + + stage_cur_val = j; + *(u16*)(out_buf + i) = SWAP16(SWAP16(orig) + j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((orig >> 8) < j && !could_be_bitflip(r4)) { + + stage_cur_val = -j; + *(u16*)(out_buf + i) = SWAP16(SWAP16(orig) - j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + *(u16*)(out_buf + i) = orig; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_ARITH16] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_ARITH16] += stage_max; + + + + /* 32-bit arithmetics, both endians. */ + + if (len < 4) goto skip_arith; + + stage_name = "arith 32/8"; + stage_short = "arith32"; + stage_cur = 0; + stage_max = 4 * (len - 3) * ARITH_MAX; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 3; i++) { + + u32 orig = *(u32*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] && + !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) { + stage_max -= 4 * ARITH_MAX; + continue; + } + + stage_cur_byte = i; + + for (j = 1; j <= ARITH_MAX; j++) { + + u32 r1 = orig ^ (orig + j), + r2 = orig ^ (orig - j), + r3 = orig ^ SWAP32(SWAP32(orig) + j), + r4 = orig ^ SWAP32(SWAP32(orig) - j); + + /* Little endian first. Same deal as with 16-bit: we only want to + try if the operation would have effect on more than two bytes. */ + + stage_val_type = STAGE_VAL_LE; + + if ((orig & 0xffff) + j > 0xffff && !could_be_bitflip(r1)) { + + stage_cur_val = j; + *(u32*)(out_buf + i) = orig + j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((orig & 0xffff) < j && !could_be_bitflip(r2)) { + + stage_cur_val = -j; + *(u32*)(out_buf + i) = orig - j; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + /* Big endian next. */ + + stage_val_type = STAGE_VAL_BE; + + if ((SWAP32(orig) & 0xffff) + j > 0xffff && !could_be_bitflip(r3)) { + + stage_cur_val = j; + *(u32*)(out_buf + i) = SWAP32(SWAP32(orig) + j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((SWAP32(orig) & 0xffff) < j && !could_be_bitflip(r4)) { + + stage_cur_val = -j; + *(u32*)(out_buf + i) = SWAP32(SWAP32(orig) - j); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + *(u32*)(out_buf + i) = orig; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_ARITH32] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_ARITH32] += stage_max; + + + + skip_arith: + + /********************** + * INTERESTING VALUES * + **********************/ + + stage_name = "interest 8/8"; + stage_short = "int8"; + stage_cur = 0; + stage_max = len * sizeof(interesting_8); + + + + stage_val_type = STAGE_VAL_LE; + + orig_hit_cnt = new_hit_cnt; + + /* Setting 8-bit integers. */ + + for (i = 0; i < len; i++) { + + u8 orig = out_buf[i]; + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)]) { + stage_max -= sizeof(interesting_8); + continue; + } + + stage_cur_byte = i; + + for (j = 0; j < sizeof(interesting_8); j++) { + + /* Skip if the value could be a product of bitflips or arithmetics. */ + + if (could_be_bitflip(orig ^ (u8)interesting_8[j]) || + could_be_arith(orig, (u8)interesting_8[j], 1)) { + stage_max--; + continue; + } + + stage_cur_val = interesting_8[j]; + out_buf[i] = interesting_8[j]; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + out_buf[i] = orig; + stage_cur++; + + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_INTEREST8] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_INTEREST8] += stage_max; + + + + /* Setting 16-bit integers, both endians. */ + + if (no_arith || len < 2) goto skip_interest; + + stage_name = "interest 16/8"; + stage_short = "int16"; + stage_cur = 0; + stage_max = 2 * (len - 1) * (sizeof(interesting_16) >> 1); + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 1; i++) { + + u16 orig = *(u16*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) { + stage_max -= sizeof(interesting_16); + continue; + } + + stage_cur_byte = i; + + for (j = 0; j < sizeof(interesting_16) / 2; j++) { + + stage_cur_val = interesting_16[j]; + + /* Skip if this could be a product of a bitflip, arithmetics, + or single-byte interesting value insertion. */ + + if (!could_be_bitflip(orig ^ (u16)interesting_16[j]) && + !could_be_arith(orig, (u16)interesting_16[j], 2) && + !could_be_interest(orig, (u16)interesting_16[j], 2, 0)) { + + stage_val_type = STAGE_VAL_LE; + + *(u16*)(out_buf + i) = interesting_16[j]; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((u16)interesting_16[j] != SWAP16(interesting_16[j]) && + !could_be_bitflip(orig ^ SWAP16(interesting_16[j])) && + !could_be_arith(orig, SWAP16(interesting_16[j]), 2) && + !could_be_interest(orig, SWAP16(interesting_16[j]), 2, 1)) { + + stage_val_type = STAGE_VAL_BE; + + *(u16*)(out_buf + i) = SWAP16(interesting_16[j]); + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + } + + *(u16*)(out_buf + i) = orig; + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_INTEREST16] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_INTEREST16] += stage_max; + + + + + if (len < 4) goto skip_interest; + + /* Setting 32-bit integers, both endians. */ + + stage_name = "interest 32/8"; + stage_short = "int32"; + stage_cur = 0; + stage_max = 2 * (len - 3) * (sizeof(interesting_32) >> 2); + + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len - 3; i++) { + + u32 orig = *(u32*)(out_buf + i); + + /* Let's consult the effector map... */ + + if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] && + !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) { + stage_max -= sizeof(interesting_32) >> 1; + continue; + } + + stage_cur_byte = i; + + for (j = 0; j < sizeof(interesting_32) / 4; j++) { + + stage_cur_val = interesting_32[j]; + + /* Skip if this could be a product of a bitflip, arithmetics, + or word interesting value insertion. */ + + if (!could_be_bitflip(orig ^ (u32)interesting_32[j]) && + !could_be_arith(orig, interesting_32[j], 4) && + !could_be_interest(orig, interesting_32[j], 4, 0)) { + + stage_val_type = STAGE_VAL_LE; + + *(u32*)(out_buf + i) = interesting_32[j]; + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + if ((u32)interesting_32[j] != SWAP32(interesting_32[j]) && + !could_be_bitflip(orig ^ SWAP32(interesting_32[j])) && + !could_be_arith(orig, SWAP32(interesting_32[j]), 4) && + !could_be_interest(orig, SWAP32(interesting_32[j]), 4, 1)) { + + stage_val_type = STAGE_VAL_BE; + + *(u32*)(out_buf + i) = SWAP32(interesting_32[j]); + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + stage_cur++; + + } else stage_max--; + + } + + *(u32*)(out_buf + i) = orig; + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_INTEREST32] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_INTEREST32] += stage_max; + + + + skip_interest: + + /******************** + * DICTIONARY STUFF * + ********************/ + + if (!extras_cnt) goto skip_user_extras; + + /* Overwrite with user-supplied extras. */ + + stage_name = "user extras (over)"; + stage_short = "ext_UO"; + stage_cur = 0; + stage_max = extras_cnt * len; + + + stage_val_type = STAGE_VAL_NONE; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len; i++) { + + u32 last_len = 0; + + stage_cur_byte = i; + + /* Extras are sorted by size, from smallest to largest. This means + that we don't have to worry about restoring the buffer in + between writes at a particular offset determined by the outer + loop. */ + + for (j = 0; j < extras_cnt; j++) { + + /* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also + skip them if there's no room to insert the payload, if the token + is redundant, or if its entire span has no bytes set in the effector + map. */ + + if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) || + extras[j].len > len - i || + !memcmp(extras[j].data, out_buf + i, extras[j].len) || + !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) { + + stage_max--; + continue; + + } + + last_len = extras[j].len; + memcpy(out_buf + i, extras[j].data, last_len); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + stage_cur++; + + } + + /* Restore all the clobbered memory. */ + memcpy(out_buf + i, in_buf + i, last_len); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_EXTRAS_UO] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_EXTRAS_UO] += stage_max; + + /* Insertion of user-supplied extras. */ + + stage_name = "user extras (insert)"; + stage_short = "ext_UI"; + stage_cur = 0; + stage_max = extras_cnt * len; + + + + + orig_hit_cnt = new_hit_cnt; + + ex_tmp = ck_alloc(len + MAX_DICT_FILE); + + for (i = 0; i <= len; i++) { + + stage_cur_byte = i; + + for (j = 0; j < extras_cnt; j++) { + + if (len + extras[j].len > MAX_FILE) { + stage_max--; + continue; + } + + /* Insert token */ + memcpy(ex_tmp + i, extras[j].data, extras[j].len); + + /* Copy tail */ + memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i); + + if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) { + ck_free(ex_tmp); + goto abandon_entry; + } + + stage_cur++; + + } + + /* Copy head */ + ex_tmp[i] = out_buf[i]; + + } + + ck_free(ex_tmp); + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_EXTRAS_UI] += stage_max; + + skip_user_extras: + + if (!a_extras_cnt) goto skip_extras; + + stage_name = "auto extras (over)"; + stage_short = "ext_AO"; + stage_cur = 0; + stage_max = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len; + + + stage_val_type = STAGE_VAL_NONE; + + orig_hit_cnt = new_hit_cnt; + + for (i = 0; i < len; i++) { + + u32 last_len = 0; + + stage_cur_byte = i; + + for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) { + + /* See the comment in the earlier code; extras are sorted by size. */ + + if (a_extras[j].len > len - i || + !memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) || + !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) { + + stage_max--; + continue; + + } + + last_len = a_extras[j].len; + memcpy(out_buf + i, a_extras[j].data, last_len); + + if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry; + + stage_cur++; + + } + + /* Restore all the clobbered memory. */ + memcpy(out_buf + i, in_buf + i, last_len); + + } + + new_hit_cnt = queued_paths + unique_crashes; + + stage_finds[STAGE_EXTRAS_AO] += new_hit_cnt - orig_hit_cnt; + stage_cycles[STAGE_EXTRAS_AO] += stage_max; + + skip_extras: + + /* If we made this to here without jumping to havoc_stage or abandon_entry, + we're properly done with deterministic steps and can mark it as such + in the .state/ directory. */ + + if (!queue_cur->passed_det) mark_as_det_done(queue_cur); + + /**************** + * RANDOM HAVOC * + ****************/ + + havoc_stage: + pacemaker_fuzzing: + + + stage_cur_byte = -1; + + /* The havoc stage mutation code is also invoked when splicing files; if the + splice_cycle variable is set, generate different descriptions and such. */ + + if (!splice_cycle) { + + stage_name = "MOpt-havoc"; + stage_short = "MOpt-havoc"; + stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / havoc_div / 100; + + } + else { + + static u8 tmp[32]; + + perf_score = orig_perf; + + sprintf(tmp, "MOpt-core-splice %u", splice_cycle); + stage_name = tmp; + stage_short = "MOpt-core-splice"; + stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100; + + } + + s32 temp_len_puppet; + cur_ms_lv = get_cur_time(); + + //for (; swarm_now < swarm_num; swarm_now++) + { + + + if (key_puppet == 1) + { + if (unlikely(orig_hit_cnt_puppet == 0)) + { + orig_hit_cnt_puppet = queued_paths + unique_crashes; + last_limit_time_start = get_cur_time(); + + SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low); + } + } + + + { + havoc_stage_puppet: + + stage_cur_byte = -1; + + /* The havoc stage mutation code is also invoked when splicing files; if the + splice_cycle variable is set, generate different descriptions and such. */ + + if (!splice_cycle) { + + stage_name = "MOpt core avoc"; + stage_short = "MOpt core havoc"; + stage_max = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) * + perf_score / havoc_div / 100; + + } + else { + static u8 tmp[32]; + perf_score = orig_perf; + sprintf(tmp, "MOpt core splice %u", splice_cycle); + stage_name = tmp; + stage_short = "MOpt core splice"; + stage_max = SPLICE_HAVOC * perf_score / havoc_div / 100; + } + + + + if (stage_max < HAVOC_MIN) stage_max = HAVOC_MIN; + + temp_len = len; + + orig_hit_cnt = queued_paths + unique_crashes; + + havoc_queued = queued_paths; + + + + for (stage_cur = 0; stage_cur < stage_max; stage_cur++) { + + u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2)); + + stage_cur_val = use_stacking; + + + for (i = 0; i < operator_num; i++) + { + core_operator_cycles_puppet_v3[i] = core_operator_cycles_puppet_v2[i]; + } + + + for (i = 0; i < use_stacking; i++) { + + switch (select_algorithm()) { + + case 0: + /* Flip a single bit somewhere. Spooky! */ + FLIP_BIT(out_buf, UR(temp_len << 3)); + core_operator_cycles_puppet_v2[STAGE_FLIP1] += 1; + break; + + + case 1: + if (temp_len < 2) break; + temp_len_puppet = UR(temp_len << 3); + FLIP_BIT(out_buf, temp_len_puppet); + FLIP_BIT(out_buf, temp_len_puppet + 1); + core_operator_cycles_puppet_v2[STAGE_FLIP2] += 1; + break; + + case 2: + if (temp_len < 2) break; + temp_len_puppet = UR(temp_len << 3); + FLIP_BIT(out_buf, temp_len_puppet); + FLIP_BIT(out_buf, temp_len_puppet + 1); + FLIP_BIT(out_buf, temp_len_puppet + 2); + FLIP_BIT(out_buf, temp_len_puppet + 3); + core_operator_cycles_puppet_v2[STAGE_FLIP4] += 1; + break; + + case 3: + if (temp_len < 4) break; + out_buf[UR(temp_len)] ^= 0xFF; + core_operator_cycles_puppet_v2[STAGE_FLIP8] += 1; + break; + + case 4: + if (temp_len < 8) break; + *(u16*)(out_buf + UR(temp_len - 1)) ^= 0xFFFF; + core_operator_cycles_puppet_v2[STAGE_FLIP16] += 1; + break; + + case 5: + if (temp_len < 8) break; + *(u32*)(out_buf + UR(temp_len - 3)) ^= 0xFFFFFFFF; + core_operator_cycles_puppet_v2[STAGE_FLIP32] += 1; + break; + + case 6: + out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX); + out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX); + core_operator_cycles_puppet_v2[STAGE_ARITH8] += 1; + break; + + case 7: + /* Randomly subtract from word, random endian. */ + if (temp_len < 8) break; + if (UR(2)) { + u32 pos = UR(temp_len - 1); + *(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 1); + u16 num = 1 + UR(ARITH_MAX); + *(u16*)(out_buf + pos) = + SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num); + } + /* Randomly add to word, random endian. */ + if (UR(2)) { + u32 pos = UR(temp_len - 1); + *(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 1); + u16 num = 1 + UR(ARITH_MAX); + *(u16*)(out_buf + pos) = + SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num); + } + core_operator_cycles_puppet_v2[STAGE_ARITH16] += 1; + break; + + + case 8: + /* Randomly subtract from dword, random endian. */ + if (temp_len < 8) break; + if (UR(2)) { + u32 pos = UR(temp_len - 3); + *(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 3); + u32 num = 1 + UR(ARITH_MAX); + *(u32*)(out_buf + pos) = + SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num); + } + /* Randomly add to dword, random endian. */ + if (UR(2)) { + u32 pos = UR(temp_len - 3); + *(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX); + } + else { + u32 pos = UR(temp_len - 3); + u32 num = 1 + UR(ARITH_MAX); + *(u32*)(out_buf + pos) = + SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num); + } + core_operator_cycles_puppet_v2[STAGE_ARITH32] += 1; + break; + + + case 9: + /* Set byte to interesting value. */ + if (temp_len < 4) break; + out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))]; + core_operator_cycles_puppet_v2[STAGE_INTEREST8] += 1; + break; + + case 10: + /* Set word to interesting value, randomly choosing endian. */ + if (temp_len < 8) break; + if (UR(2)) { + *(u16*)(out_buf + UR(temp_len - 1)) = + interesting_16[UR(sizeof(interesting_16) >> 1)]; + } + else { + *(u16*)(out_buf + UR(temp_len - 1)) = SWAP16( + interesting_16[UR(sizeof(interesting_16) >> 1)]); + } + core_operator_cycles_puppet_v2[STAGE_INTEREST16] += 1; + break; + + + case 11: + /* Set dword to interesting value, randomly choosing endian. */ + + if (temp_len < 8) break; + + if (UR(2)) { + *(u32*)(out_buf + UR(temp_len - 3)) = + interesting_32[UR(sizeof(interesting_32) >> 2)]; + } + else { + *(u32*)(out_buf + UR(temp_len - 3)) = SWAP32( + interesting_32[UR(sizeof(interesting_32) >> 2)]); + } + core_operator_cycles_puppet_v2[STAGE_INTEREST32] += 1; + break; + + + case 12: + + /* Just set a random byte to a random value. Because, + why not. We use XOR with 1-255 to eliminate the + possibility of a no-op. */ + + out_buf[UR(temp_len)] ^= 1 + UR(255); + core_operator_cycles_puppet_v2[STAGE_RANDOMBYTE] += 1; + break; + + + + case 13: { + + /* Delete bytes. We're making this a bit more likely + than insertion (the next option) in hopes of keeping + files reasonably small. */ + + u32 del_from, del_len; + + if (temp_len < 2) break; + + /* Don't delete too much. */ + + del_len = choose_block_len(temp_len - 1); + + del_from = UR(temp_len - del_len + 1); + + memmove(out_buf + del_from, out_buf + del_from + del_len, + temp_len - del_from - del_len); + + temp_len -= del_len; + core_operator_cycles_puppet_v2[STAGE_DELETEBYTE] += 1; + break; + + } + + case 14: + + if (temp_len + HAVOC_BLK_XL < MAX_FILE) { + + /* Clone bytes (75%) or insert a block of constant bytes (25%). */ + + u8 actually_clone = UR(4); + u32 clone_from, clone_to, clone_len; + u8* new_buf; + + if (actually_clone) { + + clone_len = choose_block_len(temp_len); + clone_from = UR(temp_len - clone_len + 1); + + } + else { + + clone_len = choose_block_len(HAVOC_BLK_XL); + clone_from = 0; + + } + + clone_to = UR(temp_len); + + new_buf = ck_alloc_nozero(temp_len + clone_len); + + /* Head */ + + memcpy(new_buf, out_buf, clone_to); + + /* Inserted part */ + + if (actually_clone) + memcpy(new_buf + clone_to, out_buf + clone_from, clone_len); + else + memset(new_buf + clone_to, + UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len); + + /* Tail */ + memcpy(new_buf + clone_to + clone_len, out_buf + clone_to, + temp_len - clone_to); + + ck_free(out_buf); + out_buf = new_buf; + temp_len += clone_len; + core_operator_cycles_puppet_v2[STAGE_Clone75] += 1; + } + + break; + + case 15: { + + /* Overwrite bytes with a randomly selected chunk (75%) or fixed + bytes (25%). */ + + u32 copy_from, copy_to, copy_len; + + if (temp_len < 2) break; + + copy_len = choose_block_len(temp_len - 1); + + copy_from = UR(temp_len - copy_len + 1); + copy_to = UR(temp_len - copy_len + 1); + + if (UR(4)) { + + if (copy_from != copy_to) + memmove(out_buf + copy_to, out_buf + copy_from, copy_len); + + } + else memset(out_buf + copy_to, + UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len); + core_operator_cycles_puppet_v2[STAGE_OverWrite75] += 1; + break; + + } + + + } + + } + + + tmp_core_time += 1; + + + + + u64 temp_total_found = queued_paths + unique_crashes; + + + + + if (common_fuzz_stuff(argv, out_buf, temp_len)) + goto abandon_entry_puppet; + + /* out_buf might have been mangled a bit, so let's restore it to its + original size and shape. */ + + if (temp_len < len) out_buf = ck_realloc(out_buf, len); + temp_len = len; + memcpy(out_buf, in_buf, len); + + /* If we're finding new stuff, let's run for a bit longer, limits + permitting. */ + + if (queued_paths != havoc_queued) { + + if (perf_score <= havoc_max_mult * 100) { + stage_max *= 2; + perf_score *= 2; + } + + havoc_queued = queued_paths; + + } + + if (unlikely(queued_paths + unique_crashes > temp_total_found)) + { + u64 temp_temp_puppet = queued_paths + unique_crashes - temp_total_found; + total_puppet_find = total_puppet_find + temp_temp_puppet; + for (i = 0; i < 16; i++) + { + if (core_operator_cycles_puppet_v2[i] > core_operator_cycles_puppet_v3[i]) + core_operator_finds_puppet_v2[i] += temp_temp_puppet; + } + } + + } + + new_hit_cnt = queued_paths + unique_crashes; + + +#ifndef IGNORE_FINDS + + /************ + * SPLICING * + ************/ + + + retry_splicing_puppet: + + + + if (use_splicing && splice_cycle++ < SPLICE_CYCLES_puppet && + queued_paths > 1 && queue_cur->len > 1) { + + struct queue_entry* target; + u32 tid, split_at; + u8* new_buf; + s32 f_diff, l_diff; + + /* First of all, if we've modified in_buf for havoc, let's clean that + up... */ + + if (in_buf != orig_in) { + ck_free(in_buf); + in_buf = orig_in; + len = queue_cur->len; + } + + /* Pick a random queue entry and seek to it. Don't splice with yourself. */ + + do { tid = UR(queued_paths); } while (tid == current_entry); + + splicing_with = tid; + target = queue; + + while (tid >= 100) { target = target->next_100; tid -= 100; } + while (tid--) target = target->next; + + /* Make sure that the target has a reasonable length. */ + + while (target && (target->len < 2 || target == queue_cur)) { + target = target->next; + splicing_with++; + } + + if (!target) goto retry_splicing_puppet; + + /* Read the testcase into a new buffer. */ + + fd = open(target->fname, O_RDONLY); + + if (fd < 0) PFATAL("Unable to open '%s'", target->fname); + + new_buf = ck_alloc_nozero(target->len); + + ck_read(fd, new_buf, target->len, target->fname); + + close(fd); + + /* Find a suitable splicin g location, somewhere between the first and + the last differing byte. Bail out if the difference is just a single + byte or so. */ + + locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff); + + if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) { + ck_free(new_buf); + goto retry_splicing_puppet; + } + + /* Split somewhere between the first and last differing byte. */ + + split_at = f_diff + UR(l_diff - f_diff); + + /* Do the thing. */ + + len = target->len; + memcpy(new_buf, in_buf, split_at); + in_buf = new_buf; + ck_free(out_buf); + out_buf = ck_alloc_nozero(len); + memcpy(out_buf, in_buf, len); + + goto havoc_stage_puppet; + + } + +#endif /* !IGNORE_FINDS */ + + ret_val = 0; + abandon_entry: + abandon_entry_puppet: + + if (splice_cycle >= SPLICE_CYCLES_puppet) + SPLICE_CYCLES_puppet = (UR(SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) + SPLICE_CYCLES_puppet_low); + + + splicing_with = -1; + + + munmap(orig_in, queue_cur->len); + + if (in_buf != orig_in) ck_free(in_buf); + ck_free(out_buf); + ck_free(eff_map); + + + if (key_puppet == 1) + { + if (unlikely(queued_paths + unique_crashes > ((queued_paths + unique_crashes)*limit_time_bound + orig_hit_cnt_puppet))) + { + key_puppet = 0; + cur_ms_lv = get_cur_time(); + new_hit_cnt = queued_paths + unique_crashes; + orig_hit_cnt_puppet = 0; + last_limit_time_start = 0; + } + } + + + if (unlikely(tmp_core_time > period_core)) + { + total_pacemaker_time += tmp_core_time; + tmp_core_time = 0; + temp_puppet_find = total_puppet_find; + new_hit_cnt = queued_paths + unique_crashes; + + u64 temp_stage_finds_puppet = 0; + for (i = 0; i < operator_num; i++) + { + + core_operator_finds_puppet[i] = core_operator_finds_puppet_v2[i]; + core_operator_cycles_puppet[i] = core_operator_cycles_puppet_v2[i]; + temp_stage_finds_puppet += core_operator_finds_puppet[i]; + } + + key_module = 2; + + old_hit_count = new_hit_cnt; + } + return ret_val; + } + } + + +#undef FLIP_BIT + +} + + +void pso_updating(void) { + + g_now += 1; + if (g_now > g_max) g_now = 0; + w_now = (w_init - w_end)*(g_max - g_now) / (g_max)+w_end; + int tmp_swarm, i, j; + u64 temp_operator_finds_puppet = 0; + for (i = 0; i < operator_num; i++) + { + operator_finds_puppet[i] = core_operator_finds_puppet[i]; + + for (j = 0; j < swarm_num; j++) + { + operator_finds_puppet[i] = operator_finds_puppet[i] + stage_finds_puppet[j][i]; + } + temp_operator_finds_puppet = temp_operator_finds_puppet + operator_finds_puppet[i]; + } + + for (i = 0; i < operator_num; i++) + { + if (operator_finds_puppet[i]) + G_best[i] = (double)((double)(operator_finds_puppet[i]) / (double)(temp_operator_finds_puppet)); + } + + for (tmp_swarm = 0; tmp_swarm < swarm_num; tmp_swarm++) + { + double x_temp = 0.0; + for (i = 0; i < operator_num; i++) + { + probability_now[tmp_swarm][i] = 0.0; + v_now[tmp_swarm][i] = w_now * v_now[tmp_swarm][i] + RAND_C * (L_best[tmp_swarm][i] - x_now[tmp_swarm][i]) + RAND_C * (G_best[i] - x_now[tmp_swarm][i]); + x_now[tmp_swarm][i] += v_now[tmp_swarm][i]; + if (x_now[tmp_swarm][i] > v_max) + x_now[tmp_swarm][i] = v_max; + else if (x_now[tmp_swarm][i] < v_min) + x_now[tmp_swarm][i] = v_min; + x_temp += x_now[tmp_swarm][i]; + } + + for (i = 0; i < operator_num; i++) + { + x_now[tmp_swarm][i] = x_now[tmp_swarm][i] / x_temp; + if (likely(i != 0)) + probability_now[tmp_swarm][i] = probability_now[tmp_swarm][i - 1] + x_now[tmp_swarm][i]; + else + probability_now[tmp_swarm][i] = x_now[tmp_swarm][i]; + } + if (probability_now[tmp_swarm][operator_num - 1] < 0.99 || probability_now[tmp_swarm][operator_num - 1] > 1.01) FATAL("ERROR probability"); + } + swarm_now = 0; + key_module = 0; +} + + +/* larger change for MOpt implementation: the original fuzz_one was renamed + to fuzz_one_original. All documentation references to fuzz_one therefore + mean fuzz_one_original */ +static u8 fuzz_one(char** argv) { + int key_val_lv = 0; + if (limit_time_sig == 0) { + key_val_lv = fuzz_one_original(argv); + } else { + if (key_module == 0) + key_val_lv = pilot_fuzzing(argv); + else if (key_module == 1) + key_val_lv = core_fuzzing(argv); + else if (key_module == 2) + pso_updating(); + } + + return key_val_lv; +} + /* Grab interesting test cases from other fuzzers. */ @@ -7423,10 +11130,8 @@ EXP_ST void check_binary(u8* fname) { #else -#if !defined(__arm__) && !defined(__arm64__) if (f_data[0] != 0xCF || f_data[1] != 0xFA || f_data[2] != 0xED) FATAL("Program '%s' is not a 64-bit Mach-O binary", target_path); -#endif #endif /* ^!__APPLE__ */ @@ -7584,24 +11289,31 @@ static void usage(u8* argv0) { " -f file - location read by the fuzzed program (stdin)\n" " -t msec - timeout for each run (auto-scaled, 50-%u ms)\n" " -m megs - memory limit for child process (%u MB)\n" - " -Q - use binary-only instrumentation (QEMU mode)\n\n" + " -Q - use binary-only instrumentation (QEMU mode)\n" + " -L minutes - use MOpt(imize) mode and set the limit time for entering the\n" + " pacemaker mode (minutes of no new paths, 0 = immediately).\n" + " see docs/README.MOpt\n\n" "Fuzzing behavior settings:\n" " -d - quick & dirty mode (skips deterministic steps)\n" " -n - fuzz without instrumentation (dumb mode)\n" " -x dir - optional fuzzer dictionary (see README)\n\n" + "Testing settings:\n" + " -s seed - use a fixed seed for the RNG\n" + " -V seconds - fuzz for a maximum total time of seconds then terminate\n" + " -E execs - fuzz for a maximum number of total executions then terminate\n\n" + "Other stuff:\n" " -T text - text banner to show on the screen\n" " -M / -S id - distributed mode (see parallel_fuzzing.txt)\n" " -C - crash exploration mode (the peruvian rabbit thing)\n" - " -s seed - use a fixed seed for the rng - important to testing\n" " -e ext - File extension for the temporarily generated test case\n\n" #ifdef USE_PYTHON "Compiled with Python 2.7 module support, see docs/python_mutators.txt\n" #endif - "For additional tips, please consult %s/README.\n\n", + "For additional tips, please consult %s/README\n\n", argv0, EXEC_TIMEOUT, MEM_LIMIT, doc_path); @@ -8237,14 +11949,14 @@ int main(int argc, char** argv) { struct timeval tv; struct timezone tz; - SAYF(cCYA "afl-fuzz" VERSION cRST " by <lcamtuf@google.com>, schedules by <marcel.boehme@acm.org>\n"); + SAYF(cCYA "afl-fuzz" VERSION cRST " based on afl by <lcamtuf@google.com> and a big online community\n"); doc_path = access(DOC_PATH, F_OK) ? "docs" : DOC_PATH; gettimeofday(&tv, &tz); init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); - while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Qe:p:s:")) > 0) + while ((opt = getopt(argc, argv, "+i:o:f:m:t:T:dnCB:S:M:x:Qe:p:s:V:E:L:")) > 0) switch (opt) { @@ -8445,6 +12157,110 @@ int main(int argc, char** argv) { break; + case 'V': { + most_time_key = 1; + if (sscanf(optarg, "%llu", &most_time) < 1 || optarg[0] == '-') + FATAL("Bad syntax used for -V"); + } + break; + + case 'E': { + most_execs_key = 1; + if (sscanf(optarg, "%llu", &most_execs) < 1 || optarg[0] == '-') + FATAL("Bad syntax used for -E"); + } + break; + + case 'L': { /* MOpt mode */ + + if (limit_time_sig) FATAL("Multiple -L options not supported"); + limit_time_sig = 1; + havoc_max_mult = HAVOC_MAX_MULT_MOPT; + + if (sscanf(optarg, "%llu", &limit_time_puppet) < 1 || + optarg[0] == '-') FATAL("Bad syntax used for -L"); + + u64 limit_time_puppet2 = limit_time_puppet * 60 * 1000; + + if (limit_time_puppet2 < limit_time_puppet ) FATAL("limit_time overflow"); + limit_time_puppet = limit_time_puppet2; + + SAYF("limit_time_puppet %llu\n",limit_time_puppet); + swarm_now = 0; + + if (limit_time_puppet == 0 ) + key_puppet = 1; + + int i; + int tmp_swarm = 0; + + if (g_now > g_max) g_now = 0; + w_now = (w_init - w_end)*(g_max - g_now) / (g_max)+w_end; + + for (tmp_swarm = 0; tmp_swarm < swarm_num; tmp_swarm++) + { + double total_puppet_temp = 0.0; + swarm_fitness[tmp_swarm] = 0.0; + + for (i = 0; i < operator_num; i++) + { + stage_finds_puppet[tmp_swarm][i] = 0; + probability_now[tmp_swarm][i] = 0.0; + x_now[tmp_swarm][i] = ((double)(random() % 7000)*0.0001 + 0.1); + total_puppet_temp += x_now[tmp_swarm][i]; + v_now[tmp_swarm][i] = 0.1; + L_best[tmp_swarm][i] = 0.5; + G_best[i] = 0.5; + eff_best[tmp_swarm][i] = 0.0; + + } + + for (i = 0; i < operator_num; i++) { + stage_cycles_puppet_v2[tmp_swarm][i] = stage_cycles_puppet[tmp_swarm][i]; + stage_finds_puppet_v2[tmp_swarm][i] = stage_finds_puppet[tmp_swarm][i]; + x_now[tmp_swarm][i] = x_now[tmp_swarm][i] / total_puppet_temp; + } + + double x_temp = 0.0; + + for (i = 0; i < operator_num; i++) + { + probability_now[tmp_swarm][i] = 0.0; + v_now[tmp_swarm][i] = w_now * v_now[tmp_swarm][i] + RAND_C * (L_best[tmp_swarm][i] - x_now[tmp_swarm][i]) + RAND_C * (G_best[i] - x_now[tmp_swarm][i]); + + x_now[tmp_swarm][i] += v_now[tmp_swarm][i]; + + if (x_now[tmp_swarm][i] > v_max) + x_now[tmp_swarm][i] = v_max; + else if (x_now[tmp_swarm][i] < v_min) + x_now[tmp_swarm][i] = v_min; + + x_temp += x_now[tmp_swarm][i]; + } + + for (i = 0; i < operator_num; i++) + { + x_now[tmp_swarm][i] = x_now[tmp_swarm][i] / x_temp; + if (likely(i != 0)) + probability_now[tmp_swarm][i] = probability_now[tmp_swarm][i - 1] + x_now[tmp_swarm][i]; + else + probability_now[tmp_swarm][i] = x_now[tmp_swarm][i]; + } + if (probability_now[tmp_swarm][operator_num - 1] < 0.99 || probability_now[tmp_swarm][operator_num - 1] > 1.01) + FATAL("ERROR probability"); + } + + for (i = 0; i < operator_num; i++) { + core_operator_finds_puppet[i] = 0; + core_operator_finds_puppet_v2[i] = 0; + core_operator_cycles_puppet[i] = 0; + core_operator_cycles_puppet_v2[i] = 0; + core_operator_cycles_puppet_v3[i] = 0; + } + + } + break; + default: usage(argv[0]); @@ -8481,6 +12297,14 @@ int main(int argc, char** argv) { } + OKF("afl++ is maintained by Marc \"van Hauser\" Heuse, Heiko \"hexcoder\" Eissfeldt and Andrea Fioraldi"); + OKF("afl++ is open source, get it at https://github.com/vanhauser-thc/AFLplusplus"); + OKF("Power schedules from github.com/mboehme/aflfast"); + OKF("Python Mutator and llvm_mode whitelisting from github.com/choller/afl"); + OKF("afl-tmin fork server patch from github.com/nccgroup/TriforceAFL"); + OKF("MOpt Mutator from github.com/puppet-meteor/MOpt-AFL"); + ACTF("Getting to work..."); + switch (schedule) { case FAST: OKF ("Using exponential power schedule (FAST)"); break; case COE: OKF ("Using cut-off exponential power schedule (COE)"); break; @@ -8632,6 +12456,9 @@ int main(int argc, char** argv) { if (stop_soon) goto stop_fuzzing; } + // real start time, we reset, so this works correctly with -V + start_time = get_cur_time(); + while (1) { u8 skipped_fuzz; @@ -8690,6 +12517,19 @@ int main(int argc, char** argv) { queue_cur = queue_cur->next; current_entry++; + if (most_time_key == 1) { + u64 cur_ms_lv = get_cur_time(); + if (most_time * 1000 < cur_ms_lv - start_time) { + most_time_key = 2; + break; + } + } + if (most_execs_key == 1) { + if (most_execs <= total_execs) { + most_execs_key = 2; + break; + } + } } if (queue_cur) show_stats(); @@ -8703,6 +12543,11 @@ stop_fuzzing: SAYF(CURSOR_SHOW cLRD "\n\n+++ Testing aborted %s +++\n" cRST, stop_soon == 2 ? "programmatically" : "by user"); + if (most_time_key == 2) + SAYF(cYEL "[!] " cRST "Time limit was reached\n"); + if (most_execs_key == 2) + SAYF(cYEL "[!] " cRST "Execution limit was reached\n"); + /* Running for more than 30 minutes but still doing first cycle? */ if (queue_cycle == 1 && get_cur_time() - start_time > 30 * 60 * 1000) { diff --git a/alloc-inl.h b/alloc-inl.h index d3c125fb..04f56d0d 100644 --- a/alloc-inl.h +++ b/alloc-inl.h @@ -83,10 +83,22 @@ ABORT("Use after free."); \ else ABORT("Corrupted head alloc canary."); \ } \ + } \ + } while (0) + +/* +#define CHECK_PTR(_p) do { \ + if (_p) { \ + if (ALLOC_C1(_p) ^ ALLOC_MAGIC_C1) {\ + if (ALLOC_C1(_p) == ALLOC_MAGIC_F) \ + ABORT("Use after free."); \ + else ABORT("Corrupted head alloc canary."); \ + } \ if (ALLOC_C2(_p) ^ ALLOC_MAGIC_C2) \ ABORT("Corrupted tail alloc canary."); \ } \ } while (0) +*/ #define CHECK_PTR_EXPR(_p) ({ \ typeof (_p) _tmp = (_p); \ diff --git a/config.h b/config.h index cebf7c39..d4e27e90 100644 --- a/config.h +++ b/config.h @@ -83,6 +83,7 @@ of 32-bit int overflows): */ #define HAVOC_MAX_MULT 16 +#define HAVOC_MAX_MULT_MOPT 32 /* Absolute minimum number of havoc cycles (after all adjustments): */ diff --git a/docs/ChangeLog b/docs/ChangeLog index 0d730118..dca674a2 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -17,6 +17,10 @@ sending a mail to <afl-users+subscribe@googlegroups.com>. Version ++2.52d (tbd): ----------------------------- + - added MOpt (github.com/puppet-meteor/MOpt-AFL) mode + - added never zero counters for afl-gcc and optional (because of an + optimization issue in llvm < 9) for llvm_mode (AFL_LLVM_NEVER_ZERO=1) + - added a new doc about binary only fuzzing: docs/binaryonly_fuzzing.txt - more cpu power for afl-system-config - added forkserver patch to afl-tmin, makes it much faster (originally from github.com/nccgroup/TriforceAFL) @@ -27,9 +31,13 @@ Version ++2.52d (tbd): see docs/python_mutators.txt (originally by choller@mozilla) - added AFL_CAL_FAST for slow applications and AFL_DEBUG_CHILD_OUTPUT for debugging + - added -V time and -E execs option to better comparison runs, runs afl-fuzz + for a specific time/executions. - added a -s seed switch to allow afl run with a fixed initial seed that is not updated. this is good for performance and path discovery tests as the random numbers are deterministic then + - llvm_mode LAF_... env variables can now be specified as AFL_LLVM_LAF_... + that is longer but in line with other llvm specific env vars - ... your idea or patch? diff --git a/docs/PATCHES b/docs/PATCHES index f61f8d24..f6ca9284 100644 --- a/docs/PATCHES +++ b/docs/PATCHES @@ -17,10 +17,12 @@ afl-qemu-optimize-entrypoint.diff by mh(at)mh-sec(dot)de afl-qemu-speed.diff by abiondo on github afl-qemu-optimize-map.diff by mh(at)mh-sec(dot)de ++ instrim (https://github.com/csienslab/instrim) was integrated ++ MOpt (github.com/puppet-meteor/MOpt-AFL) was imported + AFLfast additions (github.com/mboehme/aflfast) were incorporated. + Qemu 3.1 upgrade with enhancement patches (github.com/andreafioraldi/afl) -+ Python mutator modules support (github.com/choeller/afl) -+ Whitelisting in LLVM mode (github.com/choeller/afl) ++ Python mutator modules support (github.com/choller/afl) ++ Whitelisting in LLVM mode (github.com/choller/afl) + forkserver patch for afl-tmin (github.com/nccgroup/TriforceAFL) diff --git a/docs/README b/docs/README index ca8533f7..54e3e4a4 100644 --- a/docs/README +++ b/docs/README @@ -2,27 +2,31 @@ american fuzzy lop plus plus ============================ - Written by Michal Zalewski <lcamtuf@google.com> + Originally written by Michal Zalewski <lcamtuf@google.com> Repository: https://github.com/vanhauser-thc/AFLplusplus - afl++ is maintained by Marc Heuse <mh@mh-sec.de> and Heiko Eissfeldt - <heiko.eissfeldt@hexco.de> as there have been no updates to afl since - November 2017. + afl++ is maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eissfeldt + <heiko.eissfeldt@hexco.de> and Andrea Fioraldi as there have been no + updates to afl since November 2017. - This version has several bug fixes, new features and speed enhancements - based on community patches from https://github.com/vanhauser-thc/afl-patches - To see the list of which patches have been applied, see the PATCHES file. + Many improvements were made, e.g. more performant llvm_mode, supporting + llvm up to version 8, Qemu 3.1, more speed for Qemu, etc. Additionally AFLfast's power schedules by Marcel Boehme from - github.com/mboehme/aflfast have been incorporated. + https://github.com/mboehme/aflfast have been incorporated. - Plus it was upgraded to qemu 3.1 from 2.1 with the work of - https://github.com/andreafioraldi/afl and got the community patches applied - to it. + C. Hollers afl-fuzz Python mutator module and llvm_mode whitelist support + was added too (https://github.com/choller/afl) - C. Hoellers afl-fuzz Python mutator module and llvm_mode whitelist support - was added too (https://github.com/choeller/afl) + New is the excellent MOpt mutator from + https://github.com/puppet-meteor/MOpt-AFL + + Also newly integrated is instrim, a very effective CFG llvm_mode + instrumentation implementation which replaced the original afl one and is + from https://github.com/csienslab/instrim + + A more thorough list is available in the PATCHES file. So all in all this is the best-of AFL that is currently out there :-) diff --git a/docs/README.MOpt b/docs/README.MOpt new file mode 100644 index 00000000..836f5200 --- /dev/null +++ b/docs/README.MOpt @@ -0,0 +1,43 @@ +# MOpt(imized) AFL by <puppet@zju.edu.cn> + +### 1. Description +MOpt-AFL is a AFL-based fuzzer that utilizes a customized Particle Swarm +Optimization (PSO) algorithm to find the optimal selection probability +distribution of operators with respect to fuzzing effectiveness. +More details can be found in the technical report. + +### 2. Cite Information +Chenyang Lv, Shouling Ji, Chao Zhang, Yuwei Li, Wei-Han Lee, Yu Song and +Raheem Beyah, MOPT: Optimized Mutation Scheduling for Fuzzers, +USENIX Security 2019. + +### 3. Seed Sets +We open source all the seed sets used in the paper +"MOPT: Optimized Mutation Scheduling for Fuzzers". + +### 4. Experiment Results +The experiment results can be found in +https://drive.google.com/drive/folders/184GOzkZGls1H2NuLuUfSp9gfqp1E2-lL?usp=sharing. We only open source the crash files since the space is limited. + +### 5. Technical Report +MOpt_TechReport.pdf is the technical report of the paper +"MOPT: Optimized Mutation Scheduling for Fuzzers", which contains more deatails. + +### 6. Parameter Introduction +Most important, you must add the parameter `-L` (e.g., `-L 0`) to launch the +MOpt scheme. +<br>`-L` controls the time to move on to the pacemaker fuzzing mode. +<br>`-L t:` when MOpt-AFL finishes the mutation of one input, if it has not +discovered any new unique crash or path for more than t min, MOpt-AFL will +enter the pacemaker fuzzing mode. +<br>Setting 0 will enter the pacemaker fuzzing mode at first, which is +recommended in a short time-scale evaluation. + +Other important parameters can be found in afl-fuzz.c, for instance, +<br>`swarm_num:` the number of the PSO swarms used in the fuzzing process. +<br>`period_pilot:` how many times MOpt-AFL will execute the target program in the pilot fuzzing module, then it will enter the core fuzzing module. +<br>`period_core:` how many times MOpt-AFL will execute the target program in the core fuzzing module, then it will enter the PSO updating module. +<br>`limit_time_bound:` control how many interesting test cases need to be found before MOpt-AFL quits the pacemaker fuzzing mode and reuses the deterministic stage. +0 < `limit_time_bound` < 1, MOpt-AFL-tmp. `limit_time_bound` >= 1, MOpt-AFL-ever. + +Having fun with MOpt in AFL! diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt new file mode 100644 index 00000000..f370ec74 --- /dev/null +++ b/docs/binaryonly_fuzzing.txt @@ -0,0 +1,115 @@ + +Fuzzing binary-only programs with afl++ +======================================= + +afl++, libfuzzer and others are great if you have the source code, and +it allows for very fast and coverage guided fuzzing. + +However, if there is only the binary program and not source code available, +then standard afl++ (dumb mode) is not effective. + +The following is a description of how these can be fuzzed with afl++ + +!!!!! +DTLR: try DYNINST with afl-dyninst. If it produces too many crashes then + use afl -Q qemu_mode. +!!!!! + + +QEMU +---- +Qemu is the "native" solution to the program. +It is available in the ./qemu_mode/ directory and once compiled it can +be accessed by the afl-fuzz -Q command line option. +The speed decrease is at about 50% +It the easiest to use alternative and even works for cross-platform binaries. + +As it is included in afl++ this needs no URL. + + +DYNINST +------- +Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio +(see far below). Howver whereas Pintool and Dynamorio work at runtime, dyninst +instruments the target at load time, and then let it run. +This is great for some things, e.g. fuzzing, and not so effective for others, +e.g. malware analysis. + +So what we can do with dyninst is taking every basic block, and put afl's +instrumention code in there - and then save the binary. +Afterwards we can just fuzz the newly saved target binary with afl-fuzz. +Sounds great? It is. The issue though - this is a non-trivial problem to +insert instructions, which changes addresses in the process space and that +everything still works afterwards. Hence more often than not binaries +crash when they are run. + +The speed decrease is about 15-35%, depending on the optimization options +used with afl-dyninst. + +So if dyninst works, its the best option available. Otherwise it just doesn't +work well. + +https://github.com/vanhauser-thc/afl-dyninst + + +INTEL-PT +-------- +The big issue with Intel's PT is the small buffer size and the complex +encoding of the debug information collected through PT. +This makes the decoding very CPU intensive and hence slow. +As a result, the overall speed decrease is about 70-90% (depending on +the implementation and other factors) + +there are two afl intel-pt implementations: + +1. https://github.com/junxzm1990/afl-pt + => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. + +2. https://github.com/hunter-ht-2018/ptfuzzer + => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must + be used. This one is faster than the other. + + +CORESIGHT +--------- + +Coresight is the ARM answer to Intel's PT. +There is no implementation so far which handle coresight and getting +it working on an ARM Linux is very difficult due custom kernel building +on embedded systems is difficult. And finding one that has coresight in +the ARM chip is difficult too. +My guess is that it is slower than Qemu, but faster than Intel PT. +If anyone finds any coresight implemention for afl please ping me: +vh@thc.org + + +PIN & DYNAMORIO +--------------- + +Pintool and Dynamorio are dynamic instrumentation engines, and they can be +used for getting basic block information at runtime. +Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows +whereas Dynamorio is additionally available for ARM and AARCH64. +Dynamorio is also 10x faster than Pintool. + +The big issue with Dynamorio (and therefore Pintool too) is speed. +Dynamorio has a speed decrease of 98-99% +Pintool has a speed decrease of 99.5% + +Hence Dynamorio is the option to go for if everything fails, and Pintool +only if Dynamorio fails too. + +Dynamorio solutions: + https://github.com/vanhauser-thc/afl-dynamorio + https://github.com/mxmssh/drAFL + https://github.com/googleprojectzero/winafl/ <= very good but windows only + +Pintool solutions: + https://github.com/vanhauser-thc/afl-pin + https://github.com/mothran/aflpin + https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported + + +That's it! +News, corrections, updates? +Email vh@thc.org diff --git a/docs/env_variables.txt b/docs/env_variables.txt index f5db3b4f..8e2723d7 100644 --- a/docs/env_variables.txt +++ b/docs/env_variables.txt @@ -82,6 +82,9 @@ discussed in section #1, with the exception of: - TMPDIR and AFL_KEEP_ASSEMBLY, since no temporary assembly files are created. + - AFL_INST_RATIO, as we switched for instrim instrumentation which + is more effective but makes not much sense together with this option. + Then there are a few specific features that are only available in llvm_mode: LAF-INTEL @@ -89,11 +92,11 @@ Then there are a few specific features that are only available in llvm_mode: This great feature will split compares to series of single byte comparisons to allow afl-fuzz to find otherwise rather impossible paths. - - Setting LAF_SPLIT_SWITCHES will split switch()es + - Setting AFL_LLVM_LAF_SPLIT_SWITCHES will split switch()es - - Setting LAF_TRANSFORM_COMPARES will split string compare functions + - Setting AFL_LLVM_LAF_TRANSFORM_COMPARES will split string compare functions - - Setting LAF_SPLIT_COMPARES will split > 8 bit CMP instructions + - Setting AFL_LLVM_LAF_SPLIT_COMPARES will split > 8 bit CMP instructions See llvm_mode/README.laf-intel for more information. @@ -106,9 +109,17 @@ Then there are a few specific features that are only available in llvm_mode: See llvm_mode/README.whitelist for more information. -Note that AFL_INST_RATIO will behave a bit differently than for afl-gcc, -because functions are *not* instrumented unconditionally - so low values -will have a more striking effect. For this tool, 0 is not a valid choice. + OTHER + ===== + - Setting LOOPHEAD=1 optimized loops. afl-fuzz will only be able to + see the path the loop took, but not how many times it was called + (unless its a complex loop). + + - Setting AFL_LLVM_NOT_ZERO=1 during compilation will use counters + that skip zero on overflow. This is the default for llvm >= 9, + however for llvm versions below that this will increase an unnecessary + slowdown due a performance issue that is only fixed in llvm 9+. + This feature increases path discovery by a little bit. 3) Settings for afl-fuzz ------------------------ diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc new file mode 100644 index 00000000..8e9f7667 --- /dev/null +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -0,0 +1,350 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include <unordered_set> +#include <random> +#include <list> +#include <string> +#include <fstream> + +#include "../config.h" +#include "../debug.h" + +#include "MarkNodes.h" + +using namespace llvm; + +static cl::opt<bool> MarkSetOpt("markset", cl::desc("MarkSet"), + cl::init(false)); +static cl::opt<bool> LoopHeadOpt("loophead", cl::desc("LoopHead"), + cl::init(false)); + +namespace { + struct InsTrim : public ModulePass { + + protected: + std::list<std::string> myWhitelist; + + private: + std::mt19937 generator; + int total_instr = 0; + + unsigned genLabel() { + return generator() % 65536; + } + + public: + static char ID; + InsTrim() : ModulePass(ID), generator(0) {//} + +// AFLCoverage() : ModulePass(ID) { + char* instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) + report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + myWhitelist.push_back(line); + getline(fileStream, line); + } + } + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + } + +#if LLVM_VERSION_MAJOR < 4 + const char * +#else + StringRef +#endif + getPassName() const override { + return "InstTrim Instrumentation"; + } + + bool runOnModule(Module &M) override { + char be_quiet = 0; + + if (isatty(2) && !getenv("AFL_QUIET")) { + SAYF(cCYA "LLVMInsTrim" VERSION cRST " by csienslab\n"); + } else be_quiet = 1; + +#if LLVM_VERSION_MAJOR < 9 + char* neverZero_counters_str; + if ((neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO")) != NULL) + OKF("LLVM neverZero activated (by hexcoder)\n"); +#endif + + if (getenv("LOOPHEAD")) { + LoopHeadOpt = true; + } + + // this is our default + MarkSetOpt = true; + +/* // I dont think this makes sense to port into LLVMInsTrim + char* inst_ratio_str = getenv("AFL_INST_RATIO"); + unsigned int inst_ratio = 100; + if (inst_ratio_str) { + if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || inst_ratio > 100) + FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); + } +*/ + + LLVMContext &C = M.getContext(); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + + GlobalVariable *CovMapPtr = new GlobalVariable( + M, PointerType::getUnqual(Int8Ty), false, GlobalValue::ExternalLinkage, + nullptr, "__afl_area_ptr"); + + GlobalVariable *OldPrev = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", + 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); + + u64 total_rs = 0; + u64 total_hs = 0; + + for (Function &F : M) { + if (!F.size()) { + continue; + } + + if (!myWhitelist.empty()) { + bool instrumentBlock = false; + DebugLoc Loc; + StringRef instFilename; + + for (auto &BB : F) { + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + if (!Loc) + Loc = IP->getDebugLoc(); + } + + if ( Loc ) { + DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + /* If the original location is empty, try using the inlined location */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + } + } + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + for (std::list<std::string>::iterator it = myWhitelist.begin(); it != myWhitelist.end(); ++it) { + if (instFilename.str().length() >= it->length()) { + if (instFilename.str().compare(instFilename.str().length() - it->length(), it->length(), *it) == 0) { + instrumentBlock = true; + break; + } + } + } + } + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) { + if (!instFilename.str().empty()) + SAYF(cYEL "[!] " cBRI "Not in whitelist, skipping %s ...\n", instFilename.str().c_str()); + else + SAYF(cYEL "[!] " cBRI "No filename information found, skipping it"); + continue; + } + } + + std::unordered_set<BasicBlock *> MS; + if (!MarkSetOpt) { + for (auto &BB : F) { + MS.insert(&BB); + } + total_rs += F.size(); + } else { + auto Result = markNodes(&F); + auto RS = Result.first; + auto HS = Result.second; + + MS.insert(RS.begin(), RS.end()); + if (!LoopHeadOpt) { + MS.insert(HS.begin(), HS.end()); + total_rs += MS.size(); + } else { + DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeSet; + DominatorTreeWrapperPass *DTWP = &getAnalysis<DominatorTreeWrapperPass>(F); + auto DT = &DTWP->getDomTree(); + + total_rs += RS.size(); + total_hs += HS.size(); + + for (BasicBlock *BB : HS) { + bool Inserted = false; + for (auto BI = pred_begin(BB), BE = pred_end(BB); + BI != BE; ++BI + ) { + auto Edge = BasicBlockEdge(*BI, BB); + if (Edge.isSingleEdge() && DT->dominates(Edge, BB)) { + EdgeSet.insert({*BI, BB}); + Inserted = true; + break; + } + } + if (!Inserted) { + MS.insert(BB); + total_rs += 1; + total_hs -= 1; + } + } + for (auto I = EdgeSet.begin(), E = EdgeSet.end(); I != E; ++I) { + auto PredBB = I->first; + auto SuccBB = I->second; + auto NewBB = SplitBlockPredecessors(SuccBB, {PredBB}, ".split", + DT, nullptr, +#if LLVM_VERSION_MAJOR >= 8 + nullptr, +#endif + false); + MS.insert(NewBB); + } + } + + auto *EBB = &F.getEntryBlock(); + if (succ_begin(EBB) == succ_end(EBB)) { + MS.insert(EBB); + total_rs += 1; + } + + for (BasicBlock &BB : F) { + if (MS.find(&BB) == MS.end()) { + continue; + } + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), OldPrev); + } + } + + for (BasicBlock &BB : F) { + auto PI = pred_begin(&BB); + auto PE = pred_end(&BB); + if (MarkSetOpt && MS.find(&BB) == MS.end()) { + continue; + } + + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + Value *L = NULL; + if (PI == PE) { + L = ConstantInt::get(Int32Ty, genLabel()); + } else { + auto *PN = PHINode::Create(Int32Ty, 0, "", &*BB.begin()); + DenseMap<BasicBlock *, unsigned> PredMap; + for (auto PI = pred_begin(&BB), PE = pred_end(&BB); + PI != PE; ++PI + ) { + BasicBlock *PBB = *PI; + auto It = PredMap.insert({PBB, genLabel()}); + unsigned Label = It.first->second; + PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB); + } + L = PN; + } + + /* Load prev_loc */ + LoadInst *PrevLoc = IRB.CreateLoad(OldPrev); + PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + + /* Load SHM pointer */ + LoadInst *MapPtr = IRB.CreateLoad(CovMapPtr); + MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, L)); + + /* Update bitmap */ + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); + +#if LLVM_VERSION_MAJOR < 9 + if (neverZero_counters_str != NULL) { // with llvm 9 we make this the default as the bug in llvm is then fixed +#else + #warning "neverZero implementation needs to be reviewed!" +#endif + /* hexcoder: Realize a counter that skips zero during overflow. + * Once this counter reaches its maximum value, it next increments to 1 + * + * Instead of + * Counter + 1 -> Counter + * we inject now this + * Counter + 1 -> {Counter, OverflowFlag} + * Counter + OverflowFlag -> Counter + */ + auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); +#if LLVM_VERSION_MAJOR < 9 + } +#endif + + IRB.CreateStore(Incr, MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + /* Set prev_loc to cur_loc >> 1 */ + /* + StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + */ + + total_instr++; + } + } + + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n"/*", ratio %u%%)."*/, + total_instr, total_rs, total_hs, + getenv("AFL_HARDEN") ? "hardened" : + ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? + "ASAN/MSAN" : "non-hardened")/*, inst_ratio*/); + return false; + } + }; // end of struct InsTrim +} // end of anonymous namespace + +char InsTrim::ID = 0; + +static void registerAFLPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + PM.add(new InsTrim()); +} + +static RegisterStandardPasses RegisterAFLPass( + PassManagerBuilder::EP_OptimizerLast, registerAFLPass); + +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 815ac59d..d0d4b690 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -27,13 +27,18 @@ VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2) LLVM_CONFIG ?= llvm-config LLVMVER = $(shell $(LLVM_CONFIG) --version) -#LLVM_OK = $(shell $(LLVM_CONFIG) --version | egrep -q '^[5-6]' && echo 0 || echo 1 ) -LLVM_UNSUPPORTED = $(shell echo $(LLVMVER) | egrep -q '^9|3.0' && echo 1 || echo 1 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version | egrep -q '^9|3.0' && echo 1 || echo 0 ) +LLVM_MAJOR = ($shell $(LLVM_CONFIG) --version | sed 's/\..*//') ifeq "$(LLVM_UNSUPPORTED)" "1" $(warn llvm_mode only supports versions 3.8.0 up to 8.x ) endif +# this is not visible yet: +ifeq "$(LLVM_MAJOR)" "9" + $(info llvm_mode deteted llvm 9, enabling neverZero implementation) +endif + CFLAGS ?= -O3 -funroll-loops CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ @@ -89,7 +94,7 @@ endif ifndef AFL_TRACE_PC - PROGS = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so + PROGS = ../afl-clang-fast ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so else PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so endif @@ -140,8 +145,8 @@ endif $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) ln -sf afl-clang-fast ../afl-clang-fast++ -../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps - $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) +../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps + $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=gnu++11 -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) # laf ../split-switches-pass.so: split-switches-pass.so.cc | test_deps @@ -165,7 +170,7 @@ endif test_build: $(PROGS) @echo "[*] Testing the CC wrapper and instrumentation output..." - unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) LAF_SPLIT_SWITCHES=1 LAF_TRANSFORM_COMPARES=1 LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS) + unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS) echo 0 | ../afl-showmap -m none -q -o .test-instr0 ./test-instr echo 1 | ../afl-showmap -m none -q -o .test-instr1 ./test-instr @rm -f test-instr diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc new file mode 100644 index 00000000..a156fccb --- /dev/null +++ b/llvm_mode/MarkNodes.cc @@ -0,0 +1,355 @@ +#include <algorithm> +#include <map> +#include <queue> +#include <set> +#include <vector> +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +DenseMap<BasicBlock *, uint32_t> LMap; +std::vector<BasicBlock *> Blocks; +std::set<uint32_t> Marked , Markabove; +std::vector< std::vector<uint32_t> > Succs , Preds; + +void reset(){ + LMap.clear(); + Blocks.clear(); + Marked.clear(); + Markabove.clear(); +} + +uint32_t start_point; + +void labelEachBlock(Function *F) { + // Fake single endpoint; + LMap[NULL] = Blocks.size(); + Blocks.push_back(NULL); + + // Assign the unique LabelID to each block; + for (auto I = F->begin(), E = F->end(); I != E; ++I) { + BasicBlock *BB = &*I; + LMap[BB] = Blocks.size(); + Blocks.push_back(BB); + } + + start_point = LMap[&F->getEntryBlock()]; +} + +void buildCFG(Function *F) { + Succs.resize( Blocks.size() ); + Preds.resize( Blocks.size() ); + for( size_t i = 0 ; i < Succs.size() ; i ++ ){ + Succs[ i ].clear(); + Preds[ i ].clear(); + } + + //uint32_t FakeID = 0; + for (auto S = F->begin(), E = F->end(); S != E; ++S) { + BasicBlock *BB = &*S; + uint32_t MyID = LMap[BB]; + //if (succ_begin(BB) == succ_end(BB)) { + //Succs[MyID].push_back(FakeID); + //Marked.insert(MyID); + //} + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + Succs[MyID].push_back(LMap[*I]); + } + } +} + +std::vector< std::vector<uint32_t> > tSuccs; +std::vector<bool> tag , indfs; + +void DFStree(size_t now_id) { + if(tag[now_id]) return; + tag[now_id]=true; + indfs[now_id]=true; + for (auto succ: tSuccs[now_id]) { + if(tag[succ] and indfs[succ]) { + Marked.insert(succ); + Markabove.insert(succ); + continue; + } + Succs[now_id].push_back(succ); + Preds[succ].push_back(now_id); + DFStree(succ); + } + indfs[now_id]=false; +} +void turnCFGintoDAG(Function *F) { + tSuccs = Succs; + tag.resize(Blocks.size()); + indfs.resize(Blocks.size()); + for (size_t i = 0; i < Blocks.size(); ++ i) { + Succs[i].clear(); + tag[i]=false; + indfs[i]=false; + } + DFStree(start_point); + for (size_t i = 0; i < Blocks.size(); ++ i) + if( Succs[i].empty() ){ + Succs[i].push_back(0); + Preds[0].push_back(i); + } +} + +uint32_t timeStamp; +namespace DominatorTree{ + std::vector< std::vector<uint32_t> > cov; + std::vector<uint32_t> dfn, nfd, par, sdom, idom, mom, mn; + + bool Compare(uint32_t u, uint32_t v) { + return dfn[u] < dfn[v]; + } + uint32_t eval(uint32_t u) { + if( mom[u] == u ) return u; + uint32_t res = eval( mom[u] ); + if(Compare(sdom[mn[mom[u]]] , sdom[mn[u]])) { + mn[u] = mn[mom[u]]; + } + return mom[u] = res; + } + + void DFS(uint32_t now) { + timeStamp += 1; + dfn[now] = timeStamp; + nfd[timeStamp - 1] = now; + for( auto succ : Succs[now] ) { + if( dfn[succ] == 0 ) { + par[succ] = now; + DFS(succ); + } + } + } + + void DominatorTree(Function *F) { + if( Blocks.empty() ) return; + uint32_t s = start_point; + + // Initialization + mn.resize(Blocks.size()); + cov.resize(Blocks.size()); + dfn.resize(Blocks.size()); + nfd.resize(Blocks.size()); + par.resize(Blocks.size()); + mom.resize(Blocks.size()); + sdom.resize(Blocks.size()); + idom.resize(Blocks.size()); + + for( uint32_t i = 0 ; i < Blocks.size() ; i ++ ) { + dfn[i] = 0; + nfd[i] = Blocks.size(); + cov[i].clear(); + idom[i] = mom[i] = mn[i] = sdom[i] = i; + } + + timeStamp = 0; + DFS(s); + + for( uint32_t i = Blocks.size() - 1 ; i >= 1u ; i -- ) { + uint32_t now = nfd[i]; + if( now == Blocks.size() ) { + continue; + } + for( uint32_t pre : Preds[ now ] ) { + if( dfn[ pre ] ) { + eval(pre); + if( Compare(sdom[mn[pre]], sdom[now]) ) { + sdom[now] = sdom[mn[pre]]; + } + } + } + cov[sdom[now]].push_back(now); + mom[now] = par[now]; + for( uint32_t x : cov[par[now]] ) { + eval(x); + if( Compare(sdom[mn[x]], par[now]) ) { + idom[x] = mn[x]; + } else { + idom[x] = par[now]; + } + } + } + + for( uint32_t i = 1 ; i < Blocks.size() ; i += 1 ) { + uint32_t now = nfd[i]; + if( now == Blocks.size() ) { + continue; + } + if(idom[now] != sdom[now]) + idom[now] = idom[idom[now]]; + } + } +}; // End of DominatorTree + +std::vector<uint32_t> Visited, InStack; +std::vector<uint32_t> TopoOrder, InDeg; +std::vector< std::vector<uint32_t> > t_Succ , t_Pred; + +void Go(uint32_t now, uint32_t tt) { + if( now == tt ) return; + Visited[now] = InStack[now] = timeStamp; + + for(uint32_t nxt : Succs[now]) { + if(Visited[nxt] == timeStamp and InStack[nxt] == timeStamp) { + Marked.insert(nxt); + } + t_Succ[now].push_back(nxt); + t_Pred[nxt].push_back(now); + InDeg[nxt] += 1; + if(Visited[nxt] == timeStamp) { + continue; + } + Go(nxt, tt); + } + + InStack[now] = 0; +} + +void TopologicalSort(uint32_t ss, uint32_t tt) { + timeStamp += 1; + + Go(ss, tt); + + TopoOrder.clear(); + std::queue<uint32_t> wait; + wait.push(ss); + while( not wait.empty() ) { + uint32_t now = wait.front(); wait.pop(); + TopoOrder.push_back(now); + for(uint32_t nxt : t_Succ[now]) { + InDeg[nxt] -= 1; + if(InDeg[nxt] == 0u) { + wait.push(nxt); + } + } + } +} + +std::vector< std::set<uint32_t> > NextMarked; +bool Indistinguish(uint32_t node1, uint32_t node2) { + if(NextMarked[node1].size() > NextMarked[node2].size()){ + uint32_t _swap = node1; + node1 = node2; + node2 = _swap; + } + for(uint32_t x : NextMarked[node1]) { + if( NextMarked[node2].find(x) != NextMarked[node2].end() ) { + return true; + } + } + return false; +} + +void MakeUniq(uint32_t now) { + bool StopFlag = false; + if (Marked.find(now) == Marked.end()) { + for(uint32_t pred1 : t_Pred[now]) { + for(uint32_t pred2 : t_Pred[now]) { + if(pred1 == pred2) continue; + if(Indistinguish(pred1, pred2)) { + Marked.insert(now); + StopFlag = true; + break; + } + } + if (StopFlag) { + break; + } + } + } + if(Marked.find(now) != Marked.end()) { + NextMarked[now].insert(now); + } else { + for(uint32_t pred : t_Pred[now]) { + for(uint32_t x : NextMarked[pred]) { + NextMarked[now].insert(x); + } + } + } +} + +void MarkSubGraph(uint32_t ss, uint32_t tt) { + TopologicalSort(ss, tt); + if(TopoOrder.empty()) return; + + for(uint32_t i : TopoOrder) { + NextMarked[i].clear(); + } + + NextMarked[TopoOrder[0]].insert(TopoOrder[0]); + for(uint32_t i = 1 ; i < TopoOrder.size() ; i += 1) { + MakeUniq(TopoOrder[i]); + } +} + +void MarkVertice(Function *F) { + uint32_t s = start_point; + + InDeg.resize(Blocks.size()); + Visited.resize(Blocks.size()); + InStack.resize(Blocks.size()); + t_Succ.resize(Blocks.size()); + t_Pred.resize(Blocks.size()); + NextMarked.resize(Blocks.size()); + + for( uint32_t i = 0 ; i < Blocks.size() ; i += 1 ) { + Visited[i] = InStack[i] = InDeg[i] = 0; + t_Succ[i].clear(); + t_Pred[i].clear(); + } + timeStamp = 0; + uint32_t t = 0; + //MarkSubGraph(s, t); + //return; + + while( s != t ) { + MarkSubGraph(DominatorTree::idom[t], t); + t = DominatorTree::idom[t]; + } + +} + +// return {marked nodes} +std::pair<std::vector<BasicBlock *>, + std::vector<BasicBlock *> >markNodes(Function *F) { + assert(F->size() > 0 && "Function can not be empty"); + + reset(); + labelEachBlock(F); + buildCFG(F); + turnCFGintoDAG(F); + DominatorTree::DominatorTree(F); + MarkVertice(F); + + std::vector<BasicBlock *> Result , ResultAbove; + for( uint32_t x : Markabove ) { + auto it = Marked.find( x ); + if( it != Marked.end() ) + Marked.erase( it ); + if( x ) + ResultAbove.push_back(Blocks[x]); + } + for( uint32_t x : Marked ) { + if (x == 0) { + continue; + } else { + Result.push_back(Blocks[x]); + } + } + + return { Result , ResultAbove }; +} diff --git a/llvm_mode/MarkNodes.h b/llvm_mode/MarkNodes.h new file mode 100644 index 00000000..e3bf3ce5 --- /dev/null +++ b/llvm_mode/MarkNodes.h @@ -0,0 +1,11 @@ +#ifndef __MARK_NODES__ +#define __MARK_NODES__ + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include<vector> + +std::pair<std::vector<llvm::BasicBlock *>, + std::vector<llvm::BasicBlock *>> markNodes(llvm::Function *F); + +#endif diff --git a/llvm_mode/README.laf-intel b/llvm_mode/README.laf-intel index 891ab5fd..340216c3 100644 --- a/llvm_mode/README.laf-intel +++ b/llvm_mode/README.laf-intel @@ -8,13 +8,13 @@ compile the target project. The following options exist: -export LAF_SPLIT_SWITCHES=1 Enables the split-switches pass. +export AFL_LLVM_LAF_SPLIT_SWITCHES=1 Enables the split-switches pass. -export LAF_TRANSFORM_COMPARES=1 Enables the transform-compares pass +export AFL_LLVM_LAF_TRANSFORM_COMPARES=1 Enables the transform-compares pass (strcmp, memcmp, strncmp, strcasecmp, strncasecmp). -export LAF_SPLIT_COMPARES=1 Enables the split-compares pass. +export AFL_LLVM_LAF_SPLIT_COMPARES=1 Enables the split-compares pass. By default it will split all compares with a bit width <= 64 bits. You can change this behaviour by setting - export LAF_SPLIT_COMPARES_BITW=<bit_width>. + export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>. diff --git a/llvm_mode/README.llvm b/llvm_mode/README.llvm index dc860e97..77c406f8 100644 --- a/llvm_mode/README.llvm +++ b/llvm_mode/README.llvm @@ -38,8 +38,8 @@ co-exists with the original code. The idea and much of the implementation comes from Laszlo Szekeres. -2) How to use -------------- +2) How to use this +------------------ In order to leverage this mechanism, you need to have clang installed on your system. You should also make sure that the llvm-config tool is in your path @@ -69,20 +69,41 @@ operating mode of AFL, e.g.: Be sure to also include CXX set to afl-clang-fast++ for C++ code. The tool honors roughly the same environmental variables as afl-gcc (see -../docs/env_variables.txt). This includes AFL_INST_RATIO, AFL_USE_ASAN, -AFL_HARDEN, and AFL_DONT_OPTIMIZE. +../docs/env_variables.txt). This includes AFL_USE_ASAN, +AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored +as it does not serve a good purpose with the more effective instrim CFG +analysis. Note: if you want the LLVM helper to be installed on your system for all users, you need to build it before issuing 'make install' in the parent directory. -3) Gotchas, feedback, bugs +3) Options + +Several options are present to make llvm_mode faster or help it rearrange +the code to make afl-fuzz path discovery easier. + +If you need just to instrument specific parts of the code, you can whitelist +which C/C++ files to actually intrument. See README.whitelist + +For splitting memcmp, strncmp, etc. please see README.laf-intel + +As the original afl llvm_mode implementation has been replaced with +then much more effective instrim (https://github.com/csienslab/instrim/) +there is an option for optimizing loops. This optimization shows which +part of the loop has been selected, but not how many time a loop has been +called in a row (unless its a complex loop and a block inside was +instrumented). If you want to enable this set the environment variable +LOOPHEAD=1 + + +4) Gotchas, feedback, bugs -------------------------- This is an early-stage mechanism, so field reports are welcome. You can send bug reports to <afl-users@googlegroups.com>. -4) Bonus feature #1: deferred instrumentation +5) Bonus feature #1: deferred instrumentation --------------------------------------------- AFL tries to optimize performance by executing the targeted binary just once, @@ -129,7 +150,7 @@ will keep working normally when compiled with a tool other than afl-clang-fast. Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will *not* generate a deferred-initialization binary) - and you should be all set! -5) Bonus feature #2: persistent mode +6) Bonus feature #2: persistent mode ------------------------------------ Some libraries provide APIs that are stateless, or whose state can be reset in @@ -169,7 +190,7 @@ PS. Because there are task switches still involved, the mode isn't as fast as faster than the normal fork() model, and compared to in-process fuzzing, should be a lot more robust. -6) Bonus feature #3: new 'trace-pc-guard' mode +8) Bonus feature #3: new 'trace-pc-guard' mode ---------------------------------------------- Recent versions of LLVM are shipping with a built-in execution tracing feature diff --git a/llvm_mode/README.neverzero b/llvm_mode/README.neverzero new file mode 100644 index 00000000..ef873acb --- /dev/null +++ b/llvm_mode/README.neverzero @@ -0,0 +1,22 @@ +Usage +===== + +In larger, complex or reiterative programs the map that collects the edge pairs +can easily fill up and wrap. +This is not that much of an issue - unless by chance it wraps just to a 0 +when the program execution ends. +In this case afl-fuzz is not able to see that the pair has been accessed and +will ignore it. + +NeverZero prevents this behaviour. If a counter wraps, it jumps over the 0 +directly to a 1. This improves path discovery (by a very little amount) +at a very little cost (one instruction per edge). + +This is implemented in afl-gcc, however for llvm_mode this is optional if +the llvm version is below 9 - as there is a perfomance bug that is only fixed +in version 9 and onwards. + +If you want to enable this for llvm < 9 then set + +export AFL_LLVM_NOT_ZERO=1 + diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index 366389b4..249eea7d 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -32,6 +32,7 @@ #include <unistd.h> #include <stdlib.h> #include <string.h> +#include <assert.h> static u8* obj_path; /* Path to runtime libraries */ static u8** cc_params; /* Parameters passed to the real CC */ @@ -87,7 +88,7 @@ static void find_obj(u8* argv0) { return; } - FATAL("Unable to find 'afl-llvm-rt.o' or 'afl-llvm-pass.so'. Please set AFL_PATH"); + FATAL("Unable to find 'afl-llvm-rt.o' or 'libLLVMInsTrim.so'. Please set AFL_PATH"); } @@ -113,28 +114,28 @@ static void edit_params(u32 argc, char** argv) { } /* There are two ways to compile afl-clang-fast. In the traditional mode, we - use afl-llvm-pass.so to inject instrumentation. In the experimental + use libLLVMInsTrim.so to inject instrumentation. In the experimental 'trace-pc-guard' mode, we use native LLVM instrumentation callbacks instead. The latter is a very recent addition - see: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards */ // laf - if (getenv("LAF_SPLIT_SWITCHES")) { + if (getenv("LAF_SPLIT_SWITCHES")||getenv("AFL_LLVM_LAF_SPLIT_SWITCHES")) { cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = alloc_printf("%s/split-switches-pass.so", obj_path); } - if (getenv("LAF_TRANSFORM_COMPARES")) { + if (getenv("LAF_TRANSFORM_COMPARES")||getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES")) { cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = alloc_printf("%s/compare-transform-pass.so", obj_path); } - if (getenv("LAF_SPLIT_COMPARES")) { + if (getenv("LAF_SPLIT_COMPARES")||getenv("AFL_LLVM_LAF_SPLIT_COMPARES")) { cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; @@ -150,7 +151,8 @@ static void edit_params(u32 argc, char** argv) { cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); +// cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); #endif /* ^USE_TRACE_PC */ cc_params[cc_par_cnt++] = "-Qunused-arguments"; diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index d46db7c0..cfeff968 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -118,6 +118,10 @@ bool AFLCoverage::runOnModule(Module &M) { } +#if LLVM_VERSION_MAJOR < 9 + char* neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO"); +#endif + /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ @@ -227,21 +231,69 @@ bool AFLCoverage::runOnModule(Module &M) { LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - Value *MapPtrIdx = - IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc)); + Value *MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc)); /* Update bitmap */ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *Incr = IRB.CreateAdd(Counter, ConstantInt::get(Int8Ty, 1)); - IRB.CreateStore(Incr, MapPtrIdx) - ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + +#if LLVM_VERSION_MAJOR < 9 + if (neverZero_counters_str != NULL) { // with llvm 9 we make this the default as the bug in llvm is then fixed +#endif + /* hexcoder: Realize a counter that skips zero during overflow. + * Once this counter reaches its maximum value, it next increments to 1 + * + * Instead of + * Counter + 1 -> Counter + * we inject now this + * Counter + 1 -> {Counter, OverflowFlag} + * Counter + OverflowFlag -> Counter + */ +/* // we keep the old solutions just in case + // Solution #1 + if (neverZero_counters_str[0] == '1') { + CallInst *AddOv = IRB.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, Counter, ConstantInt::get(Int8Ty, 1)); + AddOv->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *SumWithOverflowBit = AddOv; + Incr = IRB.CreateAdd(IRB.CreateExtractValue(SumWithOverflowBit, 0), // sum + IRB.CreateZExt( // convert from one bit type to 8 bits type + IRB.CreateExtractValue(SumWithOverflowBit, 1), // overflow + Int8Ty)); + // Solution #2 + } else if (neverZero_counters_str[0] == '2') { + auto cf = IRB.CreateICmpEQ(Counter, ConstantInt::get(Int8Ty, 255)); + Value *HowMuch = IRB.CreateAdd(ConstantInt::get(Int8Ty, 1), cf); + Incr = IRB.CreateAdd(Counter, HowMuch); + // Solution #3 + } else if (neverZero_counters_str[0] == '3') { +*/ + // this is the solution we choose because llvm9 should do the right thing here + auto cf = IRB.CreateICmpEQ(Incr, ConstantInt::get(Int8Ty, 0)); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); +/* + // Solution #4 + } else if (neverZero_counters_str[0] == '4') { + auto cf = IRB.CreateICmpULT(Incr, ConstantInt::get(Int8Ty, 1)); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); + } else { + fprintf(stderr, "Error: unknown value for AFL_NZERO_COUNTS: %s (valid is 1-4)\n", neverZero_counters_str); + exit(-1); + } +*/ +#if LLVM_VERSION_MAJOR < 9 + } +#endif + + IRB.CreateStore(Incr, MapPtrIdx)->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); /* Set prev_loc to cur_loc >> 1 */ - StoreInst *Store = - IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); + StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); inst_blocks++; diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index d9a1f945..95435db7 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -304,7 +304,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, const bool CompareTransform::runOnModule(Module &M) { - llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, extended by heiko@hexco.de\n"; + if (getenv("AFL_QUIET") == NULL) + llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, extended by heiko@hexco.de\n"; transformCmps(M, true, true, true, true, true); verifyModule(M); diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index 25ccb3b4..c025628f 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -477,6 +477,8 @@ bool SplitComparesTransform::runOnModule(Module &M) { int bitw = 64; char* bitw_env = getenv("LAF_SPLIT_COMPARES_BITW"); + if (!bitw_env) + bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW"); if (bitw_env) { bitw = atoi(bitw_env); } @@ -485,7 +487,8 @@ bool SplitComparesTransform::runOnModule(Module &M) { simplifySignedness(M); - errs() << "Split-compare-pass by laf.intel@gmail.com\n"; + if (getenv("AFL_QUIET") == NULL) + errs() << "Split-compare-pass by laf.intel@gmail.com\n"; switch (bitw) { case 64: diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 4c28f34c..1ace3185 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -244,7 +244,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { /* If there is only the default destination or the condition checks 8 bit or less, don't bother with the code below. */ if (!SI->getNumCases() || bitw <= 8) { - errs() << "skip trivial switch..\n"; + if (getenv("AFL_QUIET") == NULL) + errs() << "skip trivial switch..\n"; continue; } @@ -302,7 +303,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { bool SplitSwitchesTransform::runOnModule(Module &M) { - llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; + if (getenv("AFL_QUIET") == NULL) + llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; splitSwitches(M); verifyModule(M); diff --git a/python_mutators/README b/python_mutators/README index 174c8a64..21a16e52 100644 --- a/python_mutators/README +++ b/python_mutators/README @@ -9,3 +9,7 @@ simple-chunk-replace.py - this is a simple example where chunks are replaced common.py - this can be used for common functions and helpers. the examples do not use this though. But you can :) + +wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py + +XmlMutatorMin.py - module for XML mutation diff --git a/python_mutators/XmlMutatorMin.py b/python_mutators/XmlMutatorMin.py new file mode 100644 index 00000000..058b7e61 --- /dev/null +++ b/python_mutators/XmlMutatorMin.py @@ -0,0 +1,331 @@ +#!/usr/bin/python + +""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """ + +from __future__ import print_function +from copy import deepcopy +from lxml import etree as ET +import random, re, io + +########################### +# The XmlMutatorMin class # +########################### + +class XmlMutatorMin: + + """ + Optionals parameters: + seed Seed used by the PRNG (default: "RANDOM") + verbose Verbosity (default: False) + """ + + def __init__(self, seed="RANDOM", verbose=False): + + """ Initialize seed, database and mutators """ + + # Verbosity + self.verbose = verbose + + # Initialize PRNG + self.seed = str(seed) + if self.seed == "RANDOM": + random.seed() + else: + if self.verbose: + print("Static seed '%s'" % self.seed) + random.seed(self.seed) + + # Initialize input and output documents + self.input_tree = None + self.tree = None + + # High-level mutators (no database needed) + hl_mutators_delete = [ "del_node_and_children", "del_node_but_children", "del_attribute", "del_content" ] # Delete items + hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values + + # Exposed mutators + self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete + + def __parse_xml (self, xml): + + """ Parse an XML string. Basic wrapper around lxml.parse() """ + + try: + # Function parse() takes care of comments / DTD / processing instructions / ... + tree = ET.parse(io.BytesIO(xml)) + except ET.ParseError: + raise RuntimeError("XML isn't well-formed!") + except LookupError as e: + raise RuntimeError(e) + + # Return a document wrapper + return tree + + def __exec_among (self, module, functions, min_times, max_times): + + """ Randomly execute $functions between $min and $max times """ + + for i in xrange (random.randint (min_times, max_times)): + # Function names are mangled because they are "private" + getattr (module, "_XmlMutatorMin__" + random.choice(functions)) () + + def __serialize_xml (self, tree): + + """ Serialize a XML document. Basic wrapper around lxml.tostring() """ + + return ET.tostring(tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding) + + def __ver (self, version): + + """ Helper for displaying lxml version numbers """ + + return ".".join(map(str, version)) + + def reset (self): + + """ Reset the mutator """ + + self.tree = deepcopy(self.input_tree) + + def init_from_string (self, input_string): + + """ Initialize the mutator from a XML string """ + + # Get a pointer to the top-element + self.input_tree = self.__parse_xml(input_string) + + # Get a working copy + self.tree = deepcopy(self.input_tree) + + def save_to_string (self): + + """ Return the current XML document as UTF-8 string """ + + # Return a text version of the tree + return self.__serialize_xml(self.tree) + + def __pick_element (self, exclude_root_node = False): + + """ Pick a random element from the current document """ + + # Get a list of all elements, but nodes like PI and comments + elems = list(self.tree.getroot().iter(tag=ET.Element)) + + # Is the root node excluded? + if exclude_root_node: + start = 1 + else: + start = 0 + + # Pick a random element + try: + elem_id = random.randint (start, len(elems) - 1) + elem = elems[elem_id] + except ValueError: + # Should only occurs if "exclude_root_node = True" + return (None, None) + + return (elem_id, elem) + + def __fuzz_attribute (self): + + """ Fuzz (part of) an attribute value """ + + # Select a node to modify + (rand_elem_id, rand_elem) = self.__pick_element() + + # Get all the attributes + attribs = rand_elem.keys() + + # Is there attributes? + if len(attribs) < 1: + if self.verbose: + print("No attribute: can't replace!") + return + + # Pick a random attribute + rand_attrib_id = random.randint (0, len(attribs) - 1) + rand_attrib = attribs[rand_attrib_id] + + # We have the attribute to modify + # Get its value + attrib_value = rand_elem.get(rand_attrib); + # print("- Value: " + attrib_value) + + # Should we work on the whole value? + func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)" + p = re.compile(func_call) + l = p.findall(attrib_value) + if random.choice((True,False)) and l: + # Randomly pick one the function calls + (func, args) = random.choice(l) + # Split by "," and randomly pick one of the arguments + value = random.choice(args.split(',')) + # Remove superfluous characters + unclean_value = value + value = value.strip(" ").strip("'") + # print("Selected argument: [%s]" % value) + else: + value = attrib_value + + # For each type, define some possible replacement values + choices_number = ( \ + "0", \ + "11111", \ + "-128", \ + "2", \ + "-1", \ + "1/3", \ + "42/0", \ + "1094861636 idiv 1.0", \ + "-1123329771506872 idiv 3.8", \ + "17=$numericRTF", \ + str(3 + random.randrange(0, 100)), \ + ) + + choices_letter = ( \ + "P" * (25 * random.randrange(1, 100)), \ + "%s%s%s%s%s%s", \ + "foobar", \ + ) + + choices_alnum = ( \ + "Abc123", \ + "020F0302020204030204", \ + "020F0302020204030204" * (random.randrange(5, 20)), \ + ) + + # Fuzz the value + if random.choice((True,False)) and value == "": + + # Empty + new_value = value + + elif random.choice((True,False)) and value.isdigit(): + + # Numbers + new_value = random.choice(choices_number) + + elif random.choice((True,False)) and value.isalpha(): + + # Letters + new_value = random.choice(choices_letter) + + elif random.choice((True,False)) and value.isalnum(): + + # Alphanumeric + new_value = random.choice(choices_alnum) + + else: + + # Default type + new_value = random.choice(choices_alnum + choices_letter + choices_number) + + # If we worked on a substring, apply changes to the whole string + if value != attrib_value: + # No ' around empty values + if new_value != "" and value != "": + new_value = "'" + new_value + "'" + # Apply changes + new_value = attrib_value.replace(unclean_value, new_value) + + # Log something + if self.verbose: + print("Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)) + + # Modify the attribute + rand_elem.set(rand_attrib, new_value.decode("utf-8")) + + def __del_node_and_children (self): + + """ High-level minimizing mutator + Delete a random node and its children (i.e. delete a random tree) """ + + self.__del_node(True) + + def __del_node_but_children (self): + + """ High-level minimizing mutator + Delete a random node but its children (i.e. link them to the parent of the deleted node) """ + + self.__del_node(False) + + def __del_node (self, delete_children): + + """ Called by the __del_node_* mutators """ + + # Select a node to modify (but the root one) + (rand_elem_id, rand_elem) = self.__pick_element (exclude_root_node = True) + + # If the document includes only a top-level element + # Then we can't pick a element (given that "exclude_root_node = True") + + # Is the document deep enough? + if rand_elem is None: + if self.verbose: + print("Can't delete a node: document not deep enough!") + return + + # Log something + if self.verbose: + but_or_and = "and" if delete_children else "but" + print("Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and)) + + if delete_children is False: + # Link children of the random (soon to be deleted) node to its parent + for child in rand_elem: + rand_elem.getparent().append(child) + + # Remove the node + rand_elem.getparent().remove(rand_elem) + + def __del_content (self): + + """ High-level minimizing mutator + Delete the attributes and children of a random node """ + + # Select a node to modify + (rand_elem_id, rand_elem) = self.__pick_element() + + # Log something + if self.verbose: + print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag)) + + # Reset the node + rand_elem.clear() + + def __del_attribute (self): + + """ High-level minimizing mutator + Delete a random attribute from a random node """ + + # Select a node to modify + (rand_elem_id, rand_elem) = self.__pick_element() + + # Get all the attributes + attribs = rand_elem.keys() + + # Is there attributes? + if len(attribs) < 1: + if self.verbose: + print("No attribute: can't delete!") + return + + # Pick a random attribute + rand_attrib_id = random.randint (0, len(attribs) - 1) + rand_attrib = attribs[rand_attrib_id] + + # Log something + if self.verbose: + print("Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)) + + # Delete the attribute + rand_elem.attrib.pop(rand_attrib) + + def mutate (self, min=1, max=5): + + """ Execute some high-level mutators between $min and $max times, then some medium-level ones """ + + # High-level mutation + self.__exec_among(self, self.hl_mutators_all, min, max) + diff --git a/python_mutators/wrapper_afl_min.py b/python_mutators/wrapper_afl_min.py new file mode 100644 index 00000000..df09b40a --- /dev/null +++ b/python_mutators/wrapper_afl_min.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python + +from XmlMutatorMin import XmlMutatorMin + +# Default settings (production mode) + +__mutator__ = None +__seed__ = "RANDOM" +__log__ = False +__log_file__ = "wrapper.log" + +# AFL functions + +def log(text): + """ + Logger + """ + + global __seed__ + global __log__ + global __log_file__ + + if __log__: + with open(__log_file__, "a") as logf: + logf.write("[%s] %s\n" % (__seed__, text)) + +def init(seed): + """ + Called once when AFL starts up. Seed is used to identify the AFL instance in log files + """ + + global __mutator__ + global __seed__ + + # Get the seed + __seed__ = seed + + # Create a global mutation class + try: + __mutator__ = XmlMutatorMin(__seed__, verbose=__log__) + log("init(): Mutator created") + except RuntimeError as e: + log("init(): Can't create mutator: %s" % e.message) + +def fuzz(buf, add_buf): + """ + Called for each fuzzing iteration. + """ + + global __mutator__ + + # Do we have a working mutator object? + if __mutator__ is None: + log("fuzz(): Can't fuzz, no mutator available") + return buf + + # Try to use the AFL buffer + via_buffer = True + + # Interpret the AFL buffer (an array of bytes) as a string + if via_buffer: + try: + buf_str = str(buf) + log("fuzz(): AFL buffer converted to a string") + except: + via_buffer = False + log("fuzz(): Can't convert AFL buffer to a string") + + # Load XML from the AFL string + if via_buffer: + try: + __mutator__.init_from_string(buf_str) + log("fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" % len(buf_str)) + except: + via_buffer = False + log("fuzz(): Can't initialize mutator with AFL buffer") + + # If init from AFL buffer wasn't succesful + if not via_buffer: + log("fuzz(): Returning unmodified AFL buffer") + return buf + + # Sucessful initialization -> mutate + try: + __mutator__.mutate(max=5) + log("fuzz(): Input mutated") + except: + log("fuzz(): Can't mutate input => returning buf") + return buf + + # Convert mutated data to a array of bytes + try: + data = bytearray(__mutator__.save_to_string()) + log("fuzz(): Mutated data converted as bytes") + except: + log("fuzz(): Can't convert mutated data to bytes => returning buf") + return buf + + # Everything went fine, returning mutated content + log("fuzz(): Returning %d bytes" % len(data)) + return data + +# Main (for debug) + +if __name__ == '__main__': + + __log__ = True + __log_file__ = "/dev/stdout" + __seed__ = "RANDOM" + + init(__seed__) + + in_1 = bytearray("<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>") + in_2 = bytearray("<abc abc123='456' abcCBA='ppppppppppppppppppppppppppppp'/>") + out = fuzz(in_1, in_2) + print(out) + |