diff options
| -rw-r--r-- | GNUmakefile | 37 | ||||
| -rw-r--r-- | docs/Changelog.md | 6 | ||||
| -rw-r--r-- | include/afl-fuzz.h | 20 | ||||
| -rw-r--r-- | include/coverage-32.h | 112 | ||||
| -rw-r--r-- | include/coverage-64.h | 189 | ||||
| -rw-r--r-- | instrumentation/afl-compiler-rt.o.c | 4 | ||||
| -rw-r--r-- | src/afl-forkserver.c | 4 | ||||
| -rw-r--r-- | src/afl-fuzz-bitmap.c | 284 | ||||
| -rw-r--r-- | src/afl-fuzz-run.c | 6 | ||||
| -rw-r--r-- | src/afl-performance.c | 124 | 
10 files changed, 450 insertions, 336 deletions
| diff --git a/GNUmakefile b/GNUmakefile index 16bcdae5..71b41227 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -42,8 +42,8 @@ endif ifdef ASAN_BUILD $(info Compiling ASAN version of binaries) - override CFLAGS+=$(ASAN_CFLAGS) - LDFLAGS+=$(ASAN_LDFLAGS) + override CFLAGS += $(ASAN_CFLAGS) + LDFLAGS += $(ASAN_LDFLAGS) endif ifdef UBSAN_BUILD $(info Compiling UBSAN version of binaries) @@ -77,30 +77,34 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -fno-move-loop-invariants - SPECIAL_PERFORMANCE += -fno-move-loop-invariants -fdisable-tree-cunrolli endif +ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" + ifndef SOURCE_DATE_EPOCH + HAVE_MARCHNATIVE = 1 + CFLAGS_OPT += -march=native + endif +endif + ifneq "$(shell uname)" "Darwin" - ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - ifndef SOURCE_DATE_EPOCH - #CFLAGS_OPT += -march=native - SPECIAL_PERFORMANCE += -march=native - endif - endif + ifeq "$(HAVE_MARCHNATIVE)" "1" + SPECIAL_PERFORMANCE += -march=native + endif # OS X does not like _FORTIFY_SOURCE=2 - ifndef DEBUG - CFLAGS_OPT += -D_FORTIFY_SOURCE=2 - endif + ifndef DEBUG + CFLAGS_OPT += -D_FORTIFY_SOURCE=2 + endif endif ifeq "$(shell uname)" "SunOS" - CFLAGS_OPT += -Wno-format-truncation - LDFLAGS=-lkstat -lrt + CFLAGS_OPT += -Wno-format-truncation + LDFLAGS = -lkstat -lrt endif ifdef STATIC $(info Compiling static version of binaries, disabling python though) # Disable python for static compilation to simplify things - PYTHON_OK=0 + PYTHON_OK = 0 PYFLAGS= - PYTHON_INCLUDE=/ + PYTHON_INCLUDE = / CFLAGS_OPT += -static LDFLAGS += -lm -lpthread -lz -lutil @@ -117,6 +121,7 @@ ifdef INTROSPECTION CFLAGS_OPT += -DINTROSPECTION=1 endif + ifneq "$(shell uname -m)" "x86_64" ifneq "$(patsubst i%86,i386,$(shell uname -m))" "i386" ifneq "$(shell uname -m)" "amd64" @@ -131,7 +136,7 @@ ifdef DEBUG $(info Compiling DEBUG version of binaries) CFLAGS += -ggdb3 -O0 -Wall -Wextra -Werror else - CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT) + CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT) endif override CFLAGS += -g -Wno-pointer-sign -Wno-variadic-macros -Wall -Wextra -Wpointer-arith \ diff --git a/docs/Changelog.md b/docs/Changelog.md index a26a4e0e..0652a295 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -10,8 +10,10 @@ sending a mail to <afl-users+subscribe@googlegroups.com>. ### Version ++3.01a (release) - - fix crash for very, very fast targets+systems (thanks to mhlakhani - for reporting) + - afl-fuzz + - fix crash for very, very fast targets+systems, thanks for reporting @mhlakhani + - switched to a faster RNG + - added hghwng's patch for faster trace map analysis - added dummy Makefile to instrumentation/ - afl-cc - allow instrumenting LLVMFuzzerTestOneInput diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 2f2d31d3..99647c5b 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -134,6 +134,12 @@ // Little helper to access the ptr to afl->##name_buf - for use in afl_realloc. #define AFL_BUF_PARAM(name) ((void **)&afl->name##_buf) +#ifdef WORD_SIZE_64 + #define AFL_RAND_RETURN u64 +#else + #define AFL_RAND_RETURN u32 +#endif + extern s8 interesting_8[INTERESTING_8_LEN]; extern s16 interesting_16[INTERESTING_8_LEN + INTERESTING_16_LEN]; extern s32 @@ -580,7 +586,7 @@ typedef struct afl_state { u32 rand_cnt; /* Random number counter */ - u64 rand_seed[4]; + u64 rand_seed[3]; s64 init_seed; u64 total_cal_us, /* Total calibration time (us) */ @@ -1014,13 +1020,9 @@ void write_bitmap(afl_state_t *); u32 count_bits(afl_state_t *, u8 *); u32 count_bytes(afl_state_t *, u8 *); u32 count_non_255_bytes(afl_state_t *, u8 *); -#ifdef WORD_SIZE_64 -void simplify_trace(afl_state_t *, u64 *); -void classify_counts(afl_forkserver_t *); -#else -void simplify_trace(afl_state_t *, u32 *); +void simplify_trace(afl_state_t *, u8 *); void classify_counts(afl_forkserver_t *); -#endif +void discover_word(u8 *ret, u64 *current, u64 *virgin); void init_count_class16(void); void minimize_bits(afl_state_t *, u8 *, u8 *); #ifndef SIMPLE_FILES @@ -1028,6 +1030,7 @@ u8 *describe_op(afl_state_t *, u8, size_t); #endif u8 save_if_interesting(afl_state_t *, void *, u32, u8); u8 has_new_bits(afl_state_t *, u8 *); +u8 has_new_bits_unclassified(afl_state_t *, u8 *); /* Extras */ @@ -1111,8 +1114,7 @@ u8 common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len); u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len, u64 exec_cksum); -/* xoshiro256** */ -uint64_t rand_next(afl_state_t *afl); +AFL_RAND_RETURN rand_next(afl_state_t *afl); /* probability between 0.0 and 1.0 */ double rand_next_percent(afl_state_t *afl); diff --git a/include/coverage-32.h b/include/coverage-32.h new file mode 100644 index 00000000..d7684708 --- /dev/null +++ b/include/coverage-32.h @@ -0,0 +1,112 @@ +#include "config.h" +#include "types.h" + +u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end); +u32 classify_word(u32 word); + +inline u32 classify_word(u32 word) { + + u16 mem16[2]; + memcpy(mem16, &word, sizeof(mem16)); + + mem16[0] = count_class_lookup16[mem16[0]]; + mem16[1] = count_class_lookup16[mem16[1]]; + + memcpy(&word, mem16, sizeof(mem16)); + return word; + +} + +void simplify_trace(afl_state_t *afl, u8 *bytes) { + + u32 *mem = (u32 *)fsrv->trace_bits; + u32 i = (fsrv->map_size >> 2); + + while (i--) { + + /* Optimize for sparse bitmaps. */ + + if (unlikely(*mem)) { + + u8 *mem8 = (u8 *)mem; + + mem8[0] = simplify_lookup[mem8[0]]; + mem8[1] = simplify_lookup[mem8[1]]; + mem8[2] = simplify_lookup[mem8[2]]; + mem8[3] = simplify_lookup[mem8[3]]; + + } else + + *mem = 0x01010101; + + mem++; + + } + +} + +inline void classify_counts(u8 *bytes) { + + u64 *mem = (u64 *)bytes; + u32 i = MAP_SIZE >> 2; + + while (i--) { + + /* Optimize for sparse bitmaps. */ + + if (unlikely(*mem)) { *mem = classify_word(*mem); } + + mem++; + + } + +} + +/* Updates the virgin bits, then reflects whether a new count or a new tuple is + * seen in ret. */ +inline void discover_word(u8 *ret, u32 *current, u32 *virgin) { + + /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap + that have not been already cleared from the virgin map - since this will + almost always be the case. */ + + if (*current & *virgin) { + + if (likely(*ret < 2)) { + + u8 *cur = (u8 *)current; + u8 *vir = (u8 *)virgin; + + /* Looks like we have not found any new bytes yet; see if any non-zero + bytes in current[] are pristine in virgin[]. */ + + if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) || + (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff)) + *ret = 2; + else + *ret = 1; + + } + + *virgin &= ~*current; + + } + +} + +#define PACK_SIZE 16 +inline u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end) { + + for (; current != current_end; virgin += 4, current += 4) { + + if (current[0] && classify_word(current[0]) & virgin[0]) return 1; + if (current[1] && classify_word(current[1]) & virgin[1]) return 1; + if (current[2] && classify_word(current[2]) & virgin[2]) return 1; + if (current[3] && classify_word(current[3]) & virgin[3]) return 1; + + } + + return 0; + +} + diff --git a/include/coverage-64.h b/include/coverage-64.h new file mode 100644 index 00000000..0ede5fa5 --- /dev/null +++ b/include/coverage-64.h @@ -0,0 +1,189 @@ +#include "config.h" +#include "types.h" + +#if (defined(__AVX512F__) && defined(__AVX512DQ__)) || defined(__AVX2__) + #include <immintrin.h> +#endif + +u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end); +u64 classify_word(u64 word); + +inline u64 classify_word(u64 word) { + + u16 mem16[4]; + memcpy(mem16, &word, sizeof(mem16)); + + mem16[0] = count_class_lookup16[mem16[0]]; + mem16[1] = count_class_lookup16[mem16[1]]; + mem16[2] = count_class_lookup16[mem16[2]]; + mem16[3] = count_class_lookup16[mem16[3]]; + + memcpy(&word, mem16, sizeof(mem16)); + return word; + +} + +void simplify_trace(afl_state_t *afl, u8 *bytes) { + + u64 *mem = (u64 *)bytes; + u32 i = (afl->fsrv.map_size >> 3); + + while (i--) { + + /* Optimize for sparse bitmaps. */ + + if (unlikely(*mem)) { + + u8 *mem8 = (u8 *)mem; + + mem8[0] = simplify_lookup[mem8[0]]; + mem8[1] = simplify_lookup[mem8[1]]; + mem8[2] = simplify_lookup[mem8[2]]; + mem8[3] = simplify_lookup[mem8[3]]; + mem8[4] = simplify_lookup[mem8[4]]; + mem8[5] = simplify_lookup[mem8[5]]; + mem8[6] = simplify_lookup[mem8[6]]; + mem8[7] = simplify_lookup[mem8[7]]; + + } else + + *mem = 0x0101010101010101ULL; + + mem++; + + } + +} + +inline void classify_counts(afl_forkserver_t *fsrv) { + + u64 *mem = (u64 *)fsrv->trace_bits; + u32 i = (fsrv->map_size >> 3); + + while (i--) { + + /* Optimize for sparse bitmaps. */ + + if (unlikely(*mem)) { *mem = classify_word(*mem); } + + mem++; + + } + +} + +/* Updates the virgin bits, then reflects whether a new count or a new tuple is + * seen in ret. */ +inline void discover_word(u8 *ret, u64 *current, u64 *virgin) { + + /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap + that have not been already cleared from the virgin map - since this will + almost always be the case. */ + + if (*current & *virgin) { + + if (likely(*ret < 2)) { + + u8 *cur = (u8 *)current; + u8 *vir = (u8 *)virgin; + + /* Looks like we have not found any new bytes yet; see if any non-zero + bytes in current[] are pristine in virgin[]. */ + + if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) || + (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) || + (cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) || + (cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff)) + *ret = 2; + else + *ret = 1; + + } + + *virgin &= ~*current; + + } + +} + +#if defined(__AVX512F__) && defined(__AVX512DQ__) + #define PACK_SIZE 64 +inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) { + + for (; current != current_end; virgin += 8, current += 8) { + + __m512i value = *(__m512i *)current; + __mmask8 mask = _mm512_testn_epi64_mask(value, value); + + /* All bytes are zero. */ + if (mask == 0xff) continue; + + /* Look for nonzero bytes and check for new bits. */ + #define UNROLL(x) \ + if (!(mask & (1 << x)) && classify_word(current[x]) & virgin[x]) return 1 + UNROLL(0); + UNROLL(1); + UNROLL(2); + UNROLL(3); + UNROLL(4); + UNROLL(5); + UNROLL(6); + UNROLL(7); + #undef UNROLL + + } + + return 0; + +} + +#endif + +#if !defined(PACK_SIZE) && defined(__AVX2__) + #define PACK_SIZE 32 +inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) { + + __m256i zeroes = _mm256_setzero_si256(); + + for (; current != current_end; virgin += 4, current += 4) { + + __m256i value = *(__m256i *)current; + __m256i cmp = _mm256_cmpeq_epi64(value, zeroes); + u32 mask = _mm256_movemask_epi8(cmp); + + /* All bytes are zero. */ + if (mask == (u32)-1) continue; + + /* Look for nonzero bytes and check for new bits. */ + if (!(mask & 0xff) && classify_word(current[0]) & virgin[0]) return 1; + if (!(mask & 0xff00) && classify_word(current[1]) & virgin[1]) return 1; + if (!(mask & 0xff0000) && classify_word(current[2]) & virgin[2]) return 1; + if (!(mask & 0xff000000) && classify_word(current[3]) & virgin[3]) return 1; + + } + + return 0; + +} + +#endif + +#if !defined(PACK_SIZE) + #define PACK_SIZE 32 +inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) { + + for (; current != current_end; virgin += 4, current += 4) { + + if (current[0] && classify_word(current[0]) & virgin[0]) return 1; + if (current[1] && classify_word(current[1]) & virgin[1]) return 1; + if (current[2] && classify_word(current[2]) & virgin[2]) return 1; + if (current[3] && classify_word(current[3]) & virgin[3]) return 1; + + } + + return 0; + +} + +#endif + diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index b1df26db..cddde87c 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -236,8 +236,8 @@ static void __afl_map_shm(void) { if (__afl_final_loc) { - if (__afl_final_loc % 8) - __afl_final_loc = (((__afl_final_loc + 7) >> 3) << 3); + if (__afl_final_loc % 32) + __afl_final_loc = (((__afl_final_loc + 31) >> 5) << 5); __afl_map_size = __afl_final_loc; if (__afl_final_loc > MAP_SIZE) { diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index 3afb94be..90fa55e9 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -641,11 +641,11 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv, if (!fsrv->map_size) { fsrv->map_size = MAP_SIZE; } - if (unlikely(tmp_map_size % 8)) { + if (unlikely(tmp_map_size % 32)) { // should not happen WARNF("Target reported non-aligned map size of %u", tmp_map_size); - tmp_map_size = (((tmp_map_size + 8) >> 3) << 3); + tmp_map_size = (((tmp_map_size + 31) >> 5) << 5); } diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c index f1ca7400..738ba986 100644 --- a/src/afl-fuzz-bitmap.c +++ b/src/afl-fuzz-bitmap.c @@ -49,101 +49,6 @@ void write_bitmap(afl_state_t *afl) { } -/* Check if the current execution path brings anything new to the table. - Update virgin bits to reflect the finds. Returns 1 if the only change is - the hit-count for a particular tuple; 2 if there are new tuples seen. - Updates the map, so subsequent calls will always return 0. - - This function is called after every exec() on a fairly large buffer, so - it needs to be fast. We do this in 32-bit and 64-bit flavors. */ - -u8 __attribute__((hot)) has_new_bits(afl_state_t *afl, u8 *virgin_map) { - -#ifdef WORD_SIZE_64 - - u64 *current = (u64 *)afl->fsrv.trace_bits; - u64 *virgin = (u64 *)virgin_map; - - u32 i = (afl->fsrv.map_size >> 3); - -#else - - u32 *current = (u32 *)afl->fsrv.trace_bits; - u32 *virgin = (u32 *)virgin_map; - - u32 i = (afl->fsrv.map_size >> 2); - -#endif /* ^WORD_SIZE_64 */ - // the map size must be a minimum of 8 bytes. - // for variable/dynamic map sizes this is ensured in the forkserver - - u8 ret = 0; - - while (i--) { - - /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap - that have not been already cleared from the virgin map - since this will - almost always be the case. */ - - // the (*current) is unnecessary but speeds up the overall comparison - if (unlikely(*current) && unlikely(*current & *virgin)) { - - if (likely(ret < 2)) { - - u8 *cur = (u8 *)current; - u8 *vir = (u8 *)virgin; - - /* Looks like we have not found any new bytes yet; see if any non-zero - bytes in current[] are pristine in virgin[]. */ - -#ifdef WORD_SIZE_64 - - if (*virgin == 0xffffffffffffffff || (cur[0] && vir[0] == 0xff) || - (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) || - (cur[3] && vir[3] == 0xff) || (cur[4] && vir[4] == 0xff) || - (cur[5] && vir[5] == 0xff) || (cur[6] && vir[6] == 0xff) || - (cur[7] && vir[7] == 0xff)) { - - ret = 2; - - } else { - - ret = 1; - - } - -#else - - if (*virgin == 0xffffffff || (cur[0] && vir[0] == 0xff) || - (cur[1] && vir[1] == 0xff) || (cur[2] && vir[2] == 0xff) || - (cur[3] && vir[3] == 0xff)) - ret = 2; - else - ret = 1; - -#endif /* ^WORD_SIZE_64 */ - - } - - *virgin &= ~*current; - - } - - ++current; - ++virgin; - - } - - if (unlikely(ret) && likely(virgin_map == afl->virgin_bits)) { - - afl->bitmap_changed = 1; - - } - - return ret; - -} - /* Count the number of bits set in the provided bitmap. Used for the status screen several times every second, does not have to be fast. */ @@ -242,77 +147,11 @@ const u8 simplify_lookup[256] = { }; -#ifdef WORD_SIZE_64 - -void simplify_trace(afl_state_t *afl, u64 *mem) { - - u32 i = (afl->fsrv.map_size >> 3); - - while (i--) { - - /* Optimize for sparse bitmaps. */ - - if (unlikely(*mem)) { - - u8 *mem8 = (u8 *)mem; - - mem8[0] = simplify_lookup[mem8[0]]; - mem8[1] = simplify_lookup[mem8[1]]; - mem8[2] = simplify_lookup[mem8[2]]; - mem8[3] = simplify_lookup[mem8[3]]; - mem8[4] = simplify_lookup[mem8[4]]; - mem8[5] = simplify_lookup[mem8[5]]; - mem8[6] = simplify_lookup[mem8[6]]; - mem8[7] = simplify_lookup[mem8[7]]; - - } else { - - *mem = 0x0101010101010101ULL; - - } - - ++mem; - - } - -} - -#else - -void simplify_trace(afl_state_t *afl, u32 *mem) { - - u32 i = (afl->fsrv.map_size >> 2); - - while (i--) { - - /* Optimize for sparse bitmaps. */ - - if (unlikely(*mem)) { - - u8 *mem8 = (u8 *)mem; - - mem8[0] = simplify_lookup[mem8[0]]; - mem8[1] = simplify_lookup[mem8[1]]; - mem8[2] = simplify_lookup[mem8[2]]; - mem8[3] = simplify_lookup[mem8[3]]; - - } else - - *mem = 0x01010101; - - ++mem; - - } - -} - -#endif /* ^WORD_SIZE_64 */ - /* Destructively classify execution counts in a trace. This is used as a preprocessing step for any newly acquired traces. Called on every exec, must be fast. */ -static const u8 count_class_lookup8[256] = { +const u8 count_class_lookup8[256] = { [0] = 0, [1] = 1, @@ -326,7 +165,7 @@ static const u8 count_class_lookup8[256] = { }; -static u16 count_class_lookup16[65536]; +u16 count_class_lookup16[65536]; void init_count_class16(void) { @@ -345,63 +184,87 @@ void init_count_class16(void) { } -#ifdef WORD_SIZE_64 +/* Import coverage processing routines. */ -void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) { +#ifdef WORD_SIZE_64 + #include "coverage-64.h" +#else + #include "coverage-32.h" +#endif - u64 *mem = (u64 *)fsrv->trace_bits; +/* Check if the current execution path brings anything new to the table. + Update virgin bits to reflect the finds. Returns 1 if the only change is + the hit-count for a particular tuple; 2 if there are new tuples seen. + Updates the map, so subsequent calls will always return 0. - u32 i = (fsrv->map_size >> 3); + This function is called after every exec() on a fairly large buffer, so + it needs to be fast. We do this in 32-bit and 64-bit flavors. */ - while (i--) { +inline u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) { - /* Optimize for sparse bitmaps. */ +#ifdef WORD_SIZE_64 - if (unlikely(*mem)) { + u64 *current = (u64 *)afl->fsrv.trace_bits; + u64 *virgin = (u64 *)virgin_map; - u16 *mem16 = (u16 *)mem; + u32 i = (afl->fsrv.map_size >> 3); - mem16[0] = count_class_lookup16[mem16[0]]; - mem16[1] = count_class_lookup16[mem16[1]]; - mem16[2] = count_class_lookup16[mem16[2]]; - mem16[3] = count_class_lookup16[mem16[3]]; +#else - } + u32 *current = (u32 *)afl->fsrv.trace_bits; + u32 *virgin = (u32 *)virgin_map; - ++mem; + u32 i = (afl->fsrv.map_size >> 2); - } +#endif /* ^WORD_SIZE_64 */ -} + u8 ret = 0; + while (i--) { -#else + if (unlikely(*current)) discover_word(&ret, current, virgin); -void __attribute__((hot)) classify_counts(afl_forkserver_t *fsrv) { + current++; + virgin++; - u32 *mem = (u32 *)fsrv->trace_bits; + } - u32 i = (fsrv->map_size >> 2); + if (unlikely(ret) && likely(virgin_map == afl->virgin_bits)) + afl->bitmap_changed = 1; - while (i--) { + return ret; - /* Optimize for sparse bitmaps. */ +} - if (unlikely(*mem)) { +/* A combination of classify_counts and has_new_bits. If 0 is returned, then the + * trace bits are kept as-is. Otherwise, the trace bits are overwritten with + * classified values. + * + * This accelerates the processing: in most cases, no interesting behavior + * happen, and the trace bits will be discarded soon. This function optimizes + * for such cases: one-pass scan on trace bits without modifying anything. Only + * on rare cases it fall backs to the slow path: classify_counts() first, then + * return has_new_bits(). */ - u16 *mem16 = (u16 *)mem; +inline u8 has_new_bits_unclassified(afl_state_t *afl, u8 *virgin_map) { - mem16[0] = count_class_lookup16[mem16[0]]; - mem16[1] = count_class_lookup16[mem16[1]]; + /* Handle the hot path first: no new coverage */ + u8 *end = afl->fsrv.trace_bits + afl->fsrv.map_size; - } +#ifdef WORD_SIZE_64 - ++mem; + if (!skim((u64 *)virgin_map, (u64 *)afl->fsrv.trace_bits, (u64 *)end)) + return 0; - } +#else -} + if (!skim((u32 *)virgin_map, (u32 *)afl->fsrv.trace_bits, (u32 *)end)) + return 0; #endif /* ^WORD_SIZE_64 */ + classify_counts(&afl->fsrv); + return has_new_bits(afl, virgin_map); + +} /* Compact trace bytes into a smaller bitmap. We effectively just drop the count information here. This is called only sporadically, for some @@ -581,7 +444,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { u8 *queue_fn = ""; u8 new_bits = '\0'; s32 fd; - u8 keeping = 0, res; + u8 keeping = 0, res, classified = 0; u64 cksum = 0; u8 fn[PATH_MAX]; @@ -605,13 +468,17 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { /* Keep only if there are new bits in the map, add to queue for future fuzzing, etc. */ - if (!(new_bits = has_new_bits(afl, afl->virgin_bits))) { + new_bits = has_new_bits_unclassified(afl, afl->virgin_bits); + + if (likely(!new_bits)) { if (unlikely(afl->crash_mode)) { ++afl->total_crashes; } return 0; } + classified = new_bits; + #ifndef SIMPLE_FILES queue_fn = alloc_printf( @@ -715,11 +582,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { if (likely(!afl->non_instrumented_mode)) { -#ifdef WORD_SIZE_64 - simplify_trace(afl, (u64 *)afl->fsrv.trace_bits); -#else - simplify_trace(afl, (u32 *)afl->fsrv.trace_bits); -#endif /* ^WORD_SIZE_64 */ + if (!classified) { + + classify_counts(&afl->fsrv); + classified = 1; + + } + + simplify_trace(afl, afl->fsrv.trace_bits); if (!has_new_bits(afl, afl->virgin_tmout)) { return keeping; } @@ -764,6 +634,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { u8 new_fault; write_to_testcase(afl, mem, len); new_fault = fuzz_run_target(afl, &afl->fsrv, afl->hang_tmout); + classify_counts(&afl->fsrv); /* A corner case that one user reported bumping into: increasing the timeout actually uncovers a crash. Make sure we don't discard it if @@ -812,11 +683,14 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { if (likely(!afl->non_instrumented_mode)) { -#ifdef WORD_SIZE_64 - simplify_trace(afl, (u64 *)afl->fsrv.trace_bits); -#else - simplify_trace(afl, (u32 *)afl->fsrv.trace_bits); -#endif /* ^WORD_SIZE_64 */ + if (!classified) { + + classify_counts(&afl->fsrv); + classified = 1; + + } + + simplify_trace(afl, afl->fsrv.trace_bits); if (!has_new_bits(afl, afl->virgin_crash)) { return keeping; } diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index a97ceb89..60086bd6 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -62,8 +62,6 @@ fuzz_run_target(afl_state_t *afl, afl_forkserver_t *fsrv, u32 timeout) { time_spent_start = (spec.tv_sec * 1000000000) + spec.tv_nsec; #endif - // TODO: Don't classify for faults? - classify_counts(fsrv); return res; } @@ -379,6 +377,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem, } + classify_counts(&afl->fsrv); cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST); if (q->exec_cksum != cksum) { @@ -767,13 +766,14 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) { write_with_gap(afl, in_buf, q->len, remove_pos, trim_avail); fault = fuzz_run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout); - ++afl->trim_execs; if (afl->stop_soon || fault == FSRV_RUN_ERROR) { goto abort_trimming; } /* Note that we don't keep track of crashes or hangs here; maybe TODO? */ + ++afl->trim_execs; + classify_counts(&afl->fsrv); cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST); /* If the deletion had no impact on the trace, make it permanent. This diff --git a/src/afl-performance.c b/src/afl-performance.c index e070a05e..89b170eb 100644 --- a/src/afl-performance.c +++ b/src/afl-performance.c @@ -27,45 +27,49 @@ #include "xxhash.h" #undef XXH_INLINE_ALL -/* we use xoshiro256** instead of rand/random because it is 10x faster and has - better randomness properties. */ - -static inline uint64_t rotl(const uint64_t x, int k) { - - return (x << k) | (x >> (64 - k)); - -} - void rand_set_seed(afl_state_t *afl, s64 init_seed) { afl->init_seed = init_seed; afl->rand_seed[0] = hash64((u8 *)&afl->init_seed, sizeof(afl->init_seed), HASH_CONST); afl->rand_seed[1] = afl->rand_seed[0] ^ 0x1234567890abcdef; - afl->rand_seed[2] = afl->rand_seed[0] & 0x0123456789abcdef; - afl->rand_seed[3] = afl->rand_seed[0] | 0x01abcde43f567908; + afl->rand_seed[2] = (afl->rand_seed[0] & 0x1234567890abcdef) ^ + (afl->rand_seed[1] | 0xfedcba9876543210); } -inline uint64_t rand_next(afl_state_t *afl) { +#define ROTL(d, lrot) ((d << (lrot)) | (d >> (8 * sizeof(d) - (lrot)))) - const uint64_t result = - rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0]; +#ifdef WORD_SIZE_64 +// romuDuoJr +inline AFL_RAND_RETURN rand_next(afl_state_t *afl) { - const uint64_t t = afl->rand_seed[1] << 17; + AFL_RAND_RETURN xp = afl->rand_seed[0]; + afl->rand_seed[0] = 15241094284759029579u * afl->rand_seed[1]; + afl->rand_seed[1] = afl->rand_seed[1] - xp; + afl->rand_seed[1] = ROTL(afl->rand_seed[1], 27); + return xp; - afl->rand_seed[2] ^= afl->rand_seed[0]; - afl->rand_seed[3] ^= afl->rand_seed[1]; - afl->rand_seed[1] ^= afl->rand_seed[2]; - afl->rand_seed[0] ^= afl->rand_seed[3]; +} - afl->rand_seed[2] ^= t; +#else +// RomuTrio32 +inline AFL_RAND_RETURN rand_next(afl_state_t *afl) { + + AFL_RAND_RETURN xp = afl->rand_seed[0], yp = afl->rand_seed[1], + zp = afl->rand_seed[2]; + afl->rand_seed[0] = 3323815723u * zp; + afl->rand_seed[1] = yp - xp; + afl->rand_seed[1] = ROTL(afl->rand_seed[1], 6); + afl->rand_seed[2] = zp - yp; + afl->rand_seed[2] = ROTL(afl->rand_seed[2], 22); + return xp; - afl->rand_seed[3] = rotl(afl->rand_seed[3], 45); +} - return result; +#endif -} +#undef ROTL /* returns a double between 0.000000000 and 1.000000000 */ @@ -75,80 +79,6 @@ inline double rand_next_percent(afl_state_t *afl) { } -/* This is the jump function for the generator. It is equivalent - to 2^128 calls to rand_next(); it can be used to generate 2^128 - non-overlapping subsequences for parallel computations. */ - -void jump(afl_state_t *afl) { - - static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, - 0xa9582618e03fc9aa, 0x39abdc4529b1661c}; - size_t i, b; - uint64_t s0 = 0; - uint64_t s1 = 0; - uint64_t s2 = 0; - uint64_t s3 = 0; - for (i = 0; i < (sizeof(JUMP) / sizeof(*JUMP)); i++) - for (b = 0; b < 64; b++) { - - if (JUMP[i] & UINT64_C(1) << b) { - - s0 ^= afl->rand_seed[0]; - s1 ^= afl->rand_seed[1]; - s2 ^= afl->rand_seed[2]; - s3 ^= afl->rand_seed[3]; - - } - - rand_next(afl); - - } - - afl->rand_seed[0] = s0; - afl->rand_seed[1] = s1; - afl->rand_seed[2] = s2; - afl->rand_seed[3] = s3; - -} - -/* This is the long-jump function for the generator. It is equivalent to - 2^192 calls to rand_next(); it can be used to generate 2^64 starting points, - from each of which jump() will generate 2^64 non-overlapping - subsequences for parallel distributed computations. */ - -void long_jump(afl_state_t *afl) { - - static const uint64_t LONG_JUMP[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3, - 0x77710069854ee241, 0x39109bb02acbe635}; - - size_t i, b; - uint64_t s0 = 0; - uint64_t s1 = 0; - uint64_t s2 = 0; - uint64_t s3 = 0; - for (i = 0; i < (sizeof(LONG_JUMP) / sizeof(*LONG_JUMP)); i++) - for (b = 0; b < 64; b++) { - - if (LONG_JUMP[i] & UINT64_C(1) << b) { - - s0 ^= afl->rand_seed[0]; - s1 ^= afl->rand_seed[1]; - s2 ^= afl->rand_seed[2]; - s3 ^= afl->rand_seed[3]; - - } - - rand_next(afl); - - } - - afl->rand_seed[0] = s0; - afl->rand_seed[1] = s1; - afl->rand_seed[2] = s2; - afl->rand_seed[3] = s3; - -} - /* we switch from afl's murmur implementation to xxh3 as it is 30% faster - and get 64 bit hashes instead of just 32 bit. Less collisions! :-) */ | 
