diff options
author | van Hauser <vh@thc.org> | 2020-06-29 18:35:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-29 18:35:51 +0200 |
commit | 6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7 (patch) | |
tree | 4c762f1e2cfb4a8741c08b5b60d07c2ae8eee860 | |
parent | 07648f75ea5ef8f03a92db0c7566da8c229dc27b (diff) | |
parent | 76a2d9b59b23873c8a6d174a2f3c48eba60712fb (diff) | |
download | afl++-6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7.tar.gz |
Merge branch 'text_inputs' into dev
-rw-r--r-- | include/afl-fuzz.h | 7 | ||||
-rw-r--r-- | include/config.h | 26 | ||||
-rw-r--r-- | src/afl-fuzz-one.c | 584 | ||||
-rw-r--r-- | src/afl-fuzz-queue.c | 116 | ||||
-rw-r--r-- | src/afl-performance.c | 10 |
5 files changed, 680 insertions, 63 deletions
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index c9f84c61..ca785e47 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -139,7 +139,8 @@ struct queue_entry { var_behavior, /* Variable behavior? */ favored, /* Currently favored? */ fs_redundant, /* Marked as redundant in the fs? */ - fully_colorized; /* Do not run redqueen stage again */ + fully_colorized, /* Do not run redqueen stage again */ + is_ascii; /* Is the input just ascii text? */ u32 bitmap_size, /* Number of bits set in bitmap */ fuzz_level; /* Number of fuzzing iterations */ @@ -947,7 +948,7 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len, u64 exec_cksum); /* xoshiro256** */ -uint64_t rand_next(afl_state_t *afl); +uint32_t rand_next(afl_state_t *afl); /**** Inline routines ****/ @@ -967,7 +968,7 @@ static inline u32 rand_below(afl_state_t *afl, u32 limit) { } - return rand_next(afl) % limit; + return (rand_next(afl) % limit); } diff --git a/include/config.h b/include/config.h index 087e0a76..09405a22 100644 --- a/include/config.h +++ b/include/config.h @@ -293,7 +293,7 @@ /* Call count interval between reseeding the libc PRNG from /dev/urandom: */ -#define RESEED_RNG 100000 +#define RESEED_RNG 256000 /* Maximum line length passed from GCC to 'as' and used for parsing configuration files: */ @@ -397,5 +397,29 @@ // #define IGNORE_FINDS +/* Text mutations */ + +/* What is the minimum length of a queue input to be evaluated for "is_ascii"? + */ + +#define AFL_TXT_MIN_LEN 12 + +/* What is the minimum percentage of ascii characters present to be classifed + as "is_ascii"? */ + +#define AFL_TXT_MIN_PERCENT 95 + +/* How often to perform ASCII mutations 0 = disable, 1-8 are good values */ + +#define AFL_TXT_BIAS 8 + +/* Maximum length of a string to tamper with */ + +#define AFL_TXT_STRING_MAX_LEN 1024 + +/* Maximum mutations on a string */ + +#define AFL_TXT_STRING_MAX_MUTATIONS 6 + #endif /* ! _HAVE_CONFIG_H */ diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 72383727..9e54815c 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -24,6 +24,11 @@ */ #include "afl-fuzz.h" +#include <string.h> + +static u8 *strnstr(const u8 *s, const u8 *find, size_t slen); +static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from, + u8 *to); /* MOpt */ @@ -362,6 +367,463 @@ static void locate_diffs(u8 *ptr1, u8 *ptr2, u32 len, s32 *first, s32 *last) { #endif /* !IGNORE_FINDS */ +/* Not pretty, but saves a lot of writing */ +#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size + +static u8 *strnstr(const u8 *s, const u8 *find, size_t slen) { + + char c, sc; + size_t len; + + if ((c = *find++) != '\0') { + + len = strlen(find); + do { + + do { + + if (slen-- < 1 || (sc = *s++) == '\0') return (NULL); + + } while (sc != c); + + if (len > slen) return (NULL); + + } while (strncmp(s, find, len) != 0); + + s--; + + } + + return ((u8 *)s); + +} + +/* replace between deliminators, if rep == NULL, then we will duplicate the + * target */ + +static u32 delim_replace(u8 **out_buf, s32 *temp_len, size_t pos, + const u8 *ldelim, const u8 *rdelim, u8 *rep) { + + u8 *end_buf = *out_buf + *temp_len; + u8 *ldelim_start = strnstr(*out_buf + pos, ldelim, *temp_len - pos); + + if (ldelim_start != NULL) { + + u32 max = (end_buf - ldelim_start - 1 > AFL_TXT_STRING_MAX_LEN + ? AFL_TXT_STRING_MAX_LEN + : end_buf - ldelim_start - 1); + + if (max > 0) { + + u8 *rdelim_end = strnstr(ldelim_start + 1, rdelim, max); + + if (rdelim_end != NULL) { + + u32 rep_len, delim_space_len = rdelim_end - ldelim_start - 1, xtra = 0; + + if (rep != NULL) { + + rep_len = (u32)strlen(rep); + + } else { // NULL? then we copy the value in between the delimiters + + rep_len = delim_space_len; + delim_space_len = 0; + rep = ldelim_start + 1; + xtra = rep_len; + + } + + if (rep_len != delim_space_len) { + + memmove(ldelim_start + rep_len + xtra + 1, rdelim_end, + *temp_len - (rdelim_end - *out_buf)); + + } + + memcpy(ldelim_start + 1, rep, rep_len); + *temp_len = (*temp_len - delim_space_len + rep_len); + + return 1; + + } + + } + + } + + return 0; + +} + +static u32 delim_swap(u8 **out_buf, s32 *temp_len, size_t pos, const u8 *ldelim, + const u8 *mdelim, const u8 *rdelim) { + + u8 *out_buf_end = *out_buf + *temp_len; + u32 max = (*temp_len - pos > AFL_TXT_STRING_MAX_LEN ? AFL_TXT_STRING_MAX_LEN + : *temp_len - pos); + u8 *ldelim_start = strnstr(*out_buf + pos, ldelim, max); + + if (ldelim_start != NULL) { + + max = (out_buf_end - ldelim_start - 1 > AFL_TXT_STRING_MAX_LEN + ? AFL_TXT_STRING_MAX_LEN + : out_buf_end - ldelim_start - 1); + if (max > 1) { + + u8 *mdelim_pos = strnstr(ldelim_start + 1, mdelim, max); + + if (mdelim_pos != NULL) { + + max = (out_buf_end - mdelim_pos - 1 > AFL_TXT_STRING_MAX_LEN + ? AFL_TXT_STRING_MAX_LEN + : out_buf_end - mdelim_pos - 1); + if (max > 0) { + + u8 *rdelim_end = strnstr(mdelim + 1, rdelim, max); + + if (rdelim_end != NULL) { + + u32 first_len = mdelim_pos - ldelim_start - 1; + u32 second_len = rdelim_end - mdelim_pos - 1; + u8 scratch[AFL_TXT_STRING_MAX_LEN]; + + memcpy(scratch, ldelim_start + 1, first_len); + + if (first_len != second_len) { + + memmove(ldelim_start + second_len + 1, mdelim_pos, + out_buf_end - mdelim_pos); + + } + + memcpy(ldelim_start + 1, mdelim_pos + 1, second_len); + + if (first_len != second_len) { + + memmove(mdelim_pos + first_len + 1, rdelim_end, + out_buf_end - rdelim_end); + + } + + memcpy(mdelim_pos + 1, scratch, first_len); + + return 1; + + } + + } + + } + + } + + } + + return 0; + +} + +static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from, + u8 *to) { + + u8 *start = strnstr(*out_buf + pos, from, *temp_len - pos); + + if (start) { + + u32 from_len = strlen(from); + u32 to_len = strlen(to); + + if (from_len != to_len) { + + memmove(start + to_len, start + from_len, + *temp_len - from_len - (start - *out_buf)); + + } + + memcpy(start, to, to_len); + *temp_len = (*temp_len - from_len + to_len); + + return 1; + + } + + return 0; + +} + +/* Returns 1 if a mutant was generated and placed in out_buf, 0 if none + * generated. */ + +static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) { + + s32 temp_len; + u32 pos, yes = 0, + mutations = rand_below(afl, AFL_TXT_STRING_MAX_MUTATIONS) + 1; + u8 *new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch), + *orig_temp_len + AFL_TXT_STRING_MAX_MUTATIONS); + temp_len = *orig_temp_len; + memcpy(new_buf, *out_buf, temp_len); + + for (u32 i = 0; i < mutations; i++) { + + if (temp_len < AFL_TXT_MIN_LEN) { + + if (yes) + return 1; + else + return 0; + + } + + pos = rand_below(afl, temp_len - 1); + int choice = rand_below(afl, 76); + switch (choice) { + + case 0: + yes += string_replace(out_buf, &temp_len, pos, "*", " "); + break; + case 1: + yes += string_replace(out_buf, &temp_len, pos, "(", "(!"); + break; + case 2: + yes += string_replace(out_buf, &temp_len, pos, "==", "!="); + break; + case 3: + yes += string_replace(out_buf, &temp_len, pos, "!=", "=="); + break; + case 4: + yes += string_replace(out_buf, &temp_len, pos, "==", "<"); + break; + case 5: + yes += string_replace(out_buf, &temp_len, pos, "<", "=="); + break; + case 6: + yes += string_replace(out_buf, &temp_len, pos, "==", ">"); + break; + case 7: + yes += string_replace(out_buf, &temp_len, pos, ">", "=="); + break; + case 8: + yes += string_replace(out_buf, &temp_len, pos, "=", "<"); + break; + case 9: + yes += string_replace(out_buf, &temp_len, pos, "=", ">"); + break; + case 10: + yes += string_replace(out_buf, &temp_len, pos, "<", ">"); + break; + case 11: + yes += string_replace(out_buf, &temp_len, pos, ">", "<"); + break; + case 12: + yes += string_replace(out_buf, &temp_len, pos, "++", "--"); + break; + case 13: + yes += string_replace(out_buf, &temp_len, pos, "--", "++"); + break; + case 14: + yes += string_replace(out_buf, &temp_len, pos, "+", "-"); + break; + case 15: + yes += string_replace(out_buf, &temp_len, pos, "+", "*"); + break; + case 16: + yes += string_replace(out_buf, &temp_len, pos, "+", "/"); + break; + case 17: + yes += string_replace(out_buf, &temp_len, pos, "+", "%"); + break; + case 18: + yes += string_replace(out_buf, &temp_len, pos, "*", "-"); + break; + case 19: + yes += string_replace(out_buf, &temp_len, pos, "*", "+"); + break; + case 20: + yes += string_replace(out_buf, &temp_len, pos, "*", "/"); + break; + case 21: + yes += string_replace(out_buf, &temp_len, pos, "*", "%"); + break; + case 22: + yes += string_replace(out_buf, &temp_len, pos, "-", "+"); + break; + case 23: + yes += string_replace(out_buf, &temp_len, pos, "-", "*"); + break; + case 24: + yes += string_replace(out_buf, &temp_len, pos, "-", "/"); + break; + case 25: + yes += string_replace(out_buf, &temp_len, pos, "-", "%"); + break; + case 26: + yes += string_replace(out_buf, &temp_len, pos, "/", "-"); + break; + case 27: + yes += string_replace(out_buf, &temp_len, pos, "/", "*"); + break; + case 28: + yes += string_replace(out_buf, &temp_len, pos, "/", "+"); + break; + case 29: + yes += string_replace(out_buf, &temp_len, pos, "/", "%"); + break; + case 30: + yes += string_replace(out_buf, &temp_len, pos, "%", "-"); + break; + case 31: + yes += string_replace(out_buf, &temp_len, pos, "%", "*"); + break; + case 32: + yes += string_replace(out_buf, &temp_len, pos, "%", "/"); + break; + case 33: + yes += string_replace(out_buf, &temp_len, pos, "%", "+"); + break; + case 34: + yes += string_replace(out_buf, &temp_len, pos, "->", "."); + break; + case 35: + yes += string_replace(out_buf, &temp_len, pos, ".", "->"); + break; + case 36: + yes += string_replace(out_buf, &temp_len, pos, "0", "1"); + break; + case 37: + yes += string_replace(out_buf, &temp_len, pos, "1", "0"); + break; + case 38: + yes += string_replace(out_buf, &temp_len, pos, "if", "while"); + break; + case 39: + yes += string_replace(out_buf, &temp_len, pos, "while", "if"); + break; + case 40: + yes += string_replace(out_buf, &temp_len, pos, "!", " "); + break; + case 41: + yes += string_replace(out_buf, &temp_len, pos, "&&", "||"); + break; + case 42: + yes += string_replace(out_buf, &temp_len, pos, "||", "&&"); + break; + case 43: + yes += string_replace(out_buf, &temp_len, pos, "!", ""); + break; + case 44: + yes += string_replace(out_buf, &temp_len, pos, "==", "="); + break; + case 45: + yes += string_replace(out_buf, &temp_len, pos, "--", ""); + break; + case 46: + yes += string_replace(out_buf, &temp_len, pos, "<<", "<"); + break; + case 47: + yes += string_replace(out_buf, &temp_len, pos, ">>", ">"); + break; + case 48: + yes += string_replace(out_buf, &temp_len, pos, "<", "<<"); + break; + case 49: + yes += string_replace(out_buf, &temp_len, pos, ">", ">>"); + break; + case 50: + yes += string_replace(out_buf, &temp_len, pos, "\"", "'"); + break; + case 51: + yes += string_replace(out_buf, &temp_len, pos, "'", "\""); + break; + case 52: + yes += string_replace(out_buf, &temp_len, pos, "(", "\""); + break; + case 53: + yes += string_replace(out_buf, &temp_len, pos, "\n", " "); + break; + case 54: + yes += string_replace(out_buf, &temp_len, pos, "\n", ";"); + break; + case 55: + yes += string_replace(out_buf, &temp_len, pos, "\n", "<"); + break; + case 56: /* Remove a semicolon delimited statement after a semicolon */ + yes += delim_replace(out_buf, &temp_len, pos, ";", ";", ";"); + break; + case 57: /* Remove a semicolon delimited statement after a left curly + brace */ + yes += delim_replace(out_buf, &temp_len, pos, "}", ";", "}"); + break; + case 58: /* Remove a curly brace construct */ + yes += delim_replace(out_buf, &temp_len, pos, "{", "}", ""); + break; + case 59: /* Replace a curly brace construct with an empty one */ + yes += delim_replace(out_buf, &temp_len, pos, "{", "}", "{}"); + break; + case 60: + yes += delim_swap(out_buf, &temp_len, pos, ";", ";", ";"); + break; + case 61: + yes += delim_swap(out_buf, &temp_len, pos, "}", ";", ";"); + break; + case 62: /* Swap comma delimited things case 1 */ + yes += delim_swap(out_buf, &temp_len, pos, "(", ",", ")"); + break; + case 63: /* Swap comma delimited things case 2 */ + yes += delim_swap(out_buf, &temp_len, pos, "(", ",", ","); + break; + case 64: /* Swap comma delimited things case 3 */ + yes += delim_swap(out_buf, &temp_len, pos, ",", ",", ","); + break; + case 65: /* Swap comma delimited things case 4 */ + yes += delim_swap(out_buf, &temp_len, pos, ",", ",", ")"); + break; + case 66: /* Just delete a line */ + yes += delim_replace(out_buf, &temp_len, pos, "\n", "\n", ""); + break; + case 67: /* Delete something like "const" case 1 */ + yes += delim_replace(out_buf, &temp_len, pos, " ", " ", ""); + break; + case 68: /* Delete something like "const" case 2 */ + yes += delim_replace(out_buf, &temp_len, pos, "\n", " ", ""); + break; + case 69: /* Delete something like "const" case 3 */ + yes += delim_replace(out_buf, &temp_len, pos, "(", " ", ""); + break; + case 70: /* Swap space delimited things case 1 */ + yes += delim_swap(out_buf, &temp_len, pos, " ", " ", " "); + break; + case 71: /* Swap space delimited things case 2 */ + yes += delim_swap(out_buf, &temp_len, pos, " ", " ", ")"); + break; + case 72: /* Swap space delimited things case 3 */ + yes += delim_swap(out_buf, &temp_len, pos, "(", " ", " "); + break; + case 73: /* Swap space delimited things case 4 */ + yes += delim_swap(out_buf, &temp_len, pos, "(", " ", ")"); + break; + case 74: /* Duplicate a single line of code */ + yes += delim_replace(out_buf, &temp_len, pos, "\n", "\n", NULL); + break; + case 75: /* Duplicate a construct (most often, a non-nested for loop */ + yes += delim_replace(out_buf, &temp_len, pos, "\n", "}", NULL); + break; + + } + + } + + if (yes == 0 || temp_len <= 0) { return 0; } + + swap_bufs(BUF_PARAMS(out), BUF_PARAMS(out_scratch)); + *out_buf = new_buf; + *orig_temp_len = temp_len; + + return 1; + +} + /* Take the current entry from the queue, fuzz it for a while. This function is a tad too long... returns 0 if fuzzed successfully, 1 if skipped or bailed out. */ @@ -378,9 +840,6 @@ u8 fuzz_one_original(afl_state_t *afl) { u8 a_collect[MAX_AUTO_EXTRA]; u32 a_len = 0; -/* Not pretty, but saves a lot of writing */ -#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size - #ifdef IGNORE_FINDS /* In IGNORE_FINDS mode, skip any entries that weren't in the @@ -1854,9 +2313,12 @@ havoc_stage: /* We essentially just do several thousand runs (depending on perf_score) where we take the input file and make random stacked tweaks. */ + u32 r_max = 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0) + + (afl->queue_cur->is_ascii ? AFL_TXT_BIAS : 0); + for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max; ++afl->stage_cur) { - u32 use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2)); + u32 r, use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2)); afl->stage_cur_val = use_stacking; @@ -1896,8 +2358,9 @@ havoc_stage: } - switch (rand_below( - afl, 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0))) { + retry_havoc: + + switch ((r = rand_below(afl, r_max))) { case 0: @@ -2192,85 +2655,96 @@ havoc_stage: } - /* Values 15 and 16 can be selected only if there are any extras - present in the dictionaries. */ + // TODO: add splicing mutation here. + // 15: + // break; - case 15: { + default: + if (r == 15 && (afl->extras_cnt || afl->a_extras_cnt)) { - /* Overwrite bytes with an extra. */ + /* Values 15 and 16 can be selected only if there are any extras + present in the dictionaries. */ - if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) { + /* Overwrite bytes with an extra. */ - /* No user-specified extras or odds in our favor. Let's use an - auto-detected one. */ + if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) { - u32 use_extra = rand_below(afl, afl->a_extras_cnt); - u32 extra_len = afl->a_extras[use_extra].len; - u32 insert_at; + /* No user-specified extras or odds in our favor. Let's use an + auto-detected one. */ - if (extra_len > temp_len) { break; } + u32 use_extra = rand_below(afl, afl->a_extras_cnt); + u32 extra_len = afl->a_extras[use_extra].len; + u32 insert_at; - insert_at = rand_below(afl, temp_len - extra_len + 1); - memcpy(out_buf + insert_at, afl->a_extras[use_extra].data, - extra_len); + if (extra_len > temp_len) { break; } - } else { + insert_at = rand_below(afl, temp_len - extra_len + 1); + memcpy(out_buf + insert_at, afl->a_extras[use_extra].data, + extra_len); - /* No auto extras or odds in our favor. Use the dictionary. */ + } else { - u32 use_extra = rand_below(afl, afl->extras_cnt); - u32 extra_len = afl->extras[use_extra].len; - u32 insert_at; + /* No auto extras or odds in our favor. Use the dictionary. */ - if (extra_len > temp_len) { break; } + u32 use_extra = rand_below(afl, afl->extras_cnt); + u32 extra_len = afl->extras[use_extra].len; + u32 insert_at; - insert_at = rand_below(afl, temp_len - extra_len + 1); - memcpy(out_buf + insert_at, afl->extras[use_extra].data, extra_len); + if (extra_len > temp_len) { break; } - } + insert_at = rand_below(afl, temp_len - extra_len + 1); + memcpy(out_buf + insert_at, afl->extras[use_extra].data, + extra_len); - break; + } - } + } else if (r == 16 && (afl->extras_cnt || afl->a_extras_cnt)) { - case 16: { + u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1); + u8 *ptr; - u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1); - u8 *ptr; + /* Insert an extra. Do the same dice-rolling stuff as for the + previous case. */ - /* Insert an extra. Do the same dice-rolling stuff as for the - previous case. */ + if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) { - if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) { + use_extra = rand_below(afl, afl->a_extras_cnt); + extra_len = afl->a_extras[use_extra].len; + ptr = afl->a_extras[use_extra].data; - use_extra = rand_below(afl, afl->a_extras_cnt); - extra_len = afl->a_extras[use_extra].len; - ptr = afl->a_extras[use_extra].data; + } else { - } else { + use_extra = rand_below(afl, afl->extras_cnt); + extra_len = afl->extras[use_extra].len; + ptr = afl->extras[use_extra].data; - use_extra = rand_below(afl, afl->extras_cnt); - extra_len = afl->extras[use_extra].len; - ptr = afl->extras[use_extra].data; + } - } + if (temp_len + extra_len >= MAX_FILE) { break; } - if (temp_len + extra_len >= MAX_FILE) { break; } + out_buf = ck_maybe_grow(BUF_PARAMS(out), temp_len + extra_len); - out_buf = ck_maybe_grow(BUF_PARAMS(out), temp_len + extra_len); + /* Tail */ + memmove(out_buf + insert_at + extra_len, out_buf + insert_at, + temp_len - insert_at); - /* Tail */ - memmove(out_buf + insert_at + extra_len, out_buf + insert_at, - temp_len - insert_at); + /* Inserted part */ + memcpy(out_buf + insert_at, ptr, extra_len); - /* Inserted part */ - memcpy(out_buf + insert_at, ptr, extra_len); + temp_len += extra_len; - temp_len += extra_len; + } else { - break; + // ascii mutations + if (text_mutation(afl, &out_buf, &temp_len) == 0) goto retry_havoc; - } + //#ifdef _AFL_DOCUMENT_MUTATIONS + // fprintf(stderr, "MUTATED: %s/mutations/%09u:*\n", + // afl->out_dir, + // afl->document_counter); + //#endif + + } } diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index 7afdd9f1..da6b1eee 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -24,6 +24,7 @@ #include "afl-fuzz.h" #include <limits.h> +#include <ctype.h> /* Mark deterministic checks as done for a particular queue entry. We use the .state file to avoid repeating deterministic fuzzing when resuming aborted @@ -100,6 +101,119 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) { } +/* check if ascii or UTF-8 */ + +static u8 check_if_text(struct queue_entry *q) { + + if (q->len < AFL_TXT_MIN_LEN) return 0; + + u8 buf[MAX_FILE], bom[3] = {0xef, 0xbb, 0xbf}; + s32 fd, len = q->len, offset = 0, ascii = 0, utf8 = 0, comp; + + if ((fd = open(q->fname, O_RDONLY)) < 0) return 0; + if ((comp = read(fd, buf, len)) != len) return 0; + close(fd); + + while (offset < len) { + + // ASCII: <= 0x7F to allow ASCII control characters + if ((buf[offset + 0] == 0x09 || buf[offset + 0] == 0x0A || + buf[offset + 0] == 0x0D || + (0x20 <= buf[offset + 0] && buf[offset + 0] <= 0x7E))) { + + offset++; + utf8++; + ascii++; + continue; + + } + + if (isascii((int)buf[offset]) || isprint((int)buf[offset])) { + + ascii++; + // we continue though as it can also be a valid utf8 + + } + + // non-overlong 2-byte + if (((0xC2 <= buf[offset + 0] && buf[offset + 0] <= 0xDF) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))) { + + offset += 2; + utf8++; + comp--; + continue; + + } + + // excluding overlongs + if ((buf[offset + 0] == 0xE0 && + (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && + buf[offset + 2] <= 0xBF)) || // straight 3-byte + (((0xE1 <= buf[offset + 0] && buf[offset + 0] <= 0xEC) || + buf[offset + 0] == 0xEE || buf[offset + 0] == 0xEF) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && + buf[offset + 2] <= 0xBF)) || // excluding surrogates + (buf[offset + 0] == 0xED && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF))) { + + offset += 3; + utf8++; + comp -= 2; + continue; + + } + + // planes 1-3 + if ((buf[offset + 0] == 0xF0 && + (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && + buf[offset + 3] <= 0xBF)) || // planes 4-15 + ((0xF1 <= buf[offset + 0] && buf[offset + 0] <= 0xF3) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // plane 16 + (buf[offset + 0] == 0xF4 && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF))) { + + offset += 4; + utf8++; + comp -= 3; + continue; + + } + + // handle utf8 bom + if (buf[offset + 0] == bom[0] && buf[offset + 1] == bom[1] && + buf[offset + 2] == bom[2]) { + + offset += 3; + utf8++; + comp -= 2; + continue; + + } + + offset++; + + } + + u32 percent_utf8 = (utf8 * 100) / comp; + u32 percent_ascii = (ascii * 100) / len; + + if (percent_utf8 >= percent_ascii && percent_utf8 >= AFL_TXT_MIN_PERCENT) + return 2; + if (percent_ascii >= AFL_TXT_MIN_PERCENT) return 1; + return 0; + +} + /* Append new test case to the queue. */ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) { @@ -159,6 +273,8 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) { } + q->is_ascii = check_if_text(q); + } /* Destroy the entire queue. */ diff --git a/src/afl-performance.c b/src/afl-performance.c index 0c1697a8..6631f148 100644 --- a/src/afl-performance.c +++ b/src/afl-performance.c @@ -44,10 +44,12 @@ void rand_set_seed(afl_state_t *afl, s64 init_seed) { } -uint64_t rand_next(afl_state_t *afl) { +uint32_t rand_next(afl_state_t *afl) { - const uint64_t result = - rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0]; + const uint32_t result = + (uint32_t)rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + + afl->rand_seed[0]; + // const uint32_t result = (uint32_t) rotl(afl->rand_seed[1] * 5, 7) * 9; const uint64_t t = afl->rand_seed[1] << 17; @@ -60,7 +62,7 @@ uint64_t rand_next(afl_state_t *afl) { afl->rand_seed[3] = rotl(afl->rand_seed[3], 45); - return result; + return (uint32_t)result; } |