aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2020-06-29 18:35:51 +0200
committerGitHub <noreply@github.com>2020-06-29 18:35:51 +0200
commit6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7 (patch)
tree4c762f1e2cfb4a8741c08b5b60d07c2ae8eee860
parent07648f75ea5ef8f03a92db0c7566da8c229dc27b (diff)
parent76a2d9b59b23873c8a6d174a2f3c48eba60712fb (diff)
downloadafl++-6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7.tar.gz
Merge branch 'text_inputs' into dev
-rw-r--r--include/afl-fuzz.h7
-rw-r--r--include/config.h26
-rw-r--r--src/afl-fuzz-one.c584
-rw-r--r--src/afl-fuzz-queue.c116
-rw-r--r--src/afl-performance.c10
5 files changed, 680 insertions, 63 deletions
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index c9f84c61..ca785e47 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -139,7 +139,8 @@ struct queue_entry {
var_behavior, /* Variable behavior? */
favored, /* Currently favored? */
fs_redundant, /* Marked as redundant in the fs? */
- fully_colorized; /* Do not run redqueen stage again */
+ fully_colorized, /* Do not run redqueen stage again */
+ is_ascii; /* Is the input just ascii text? */
u32 bitmap_size, /* Number of bits set in bitmap */
fuzz_level; /* Number of fuzzing iterations */
@@ -947,7 +948,7 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
u64 exec_cksum);
/* xoshiro256** */
-uint64_t rand_next(afl_state_t *afl);
+uint32_t rand_next(afl_state_t *afl);
/**** Inline routines ****/
@@ -967,7 +968,7 @@ static inline u32 rand_below(afl_state_t *afl, u32 limit) {
}
- return rand_next(afl) % limit;
+ return (rand_next(afl) % limit);
}
diff --git a/include/config.h b/include/config.h
index 087e0a76..09405a22 100644
--- a/include/config.h
+++ b/include/config.h
@@ -293,7 +293,7 @@
/* Call count interval between reseeding the libc PRNG from /dev/urandom: */
-#define RESEED_RNG 100000
+#define RESEED_RNG 256000
/* Maximum line length passed from GCC to 'as' and used for parsing
configuration files: */
@@ -397,5 +397,29 @@
// #define IGNORE_FINDS
+/* Text mutations */
+
+/* What is the minimum length of a queue input to be evaluated for "is_ascii"?
+ */
+
+#define AFL_TXT_MIN_LEN 12
+
+/* What is the minimum percentage of ascii characters present to be classifed
+ as "is_ascii"? */
+
+#define AFL_TXT_MIN_PERCENT 95
+
+/* How often to perform ASCII mutations 0 = disable, 1-8 are good values */
+
+#define AFL_TXT_BIAS 8
+
+/* Maximum length of a string to tamper with */
+
+#define AFL_TXT_STRING_MAX_LEN 1024
+
+/* Maximum mutations on a string */
+
+#define AFL_TXT_STRING_MAX_MUTATIONS 6
+
#endif /* ! _HAVE_CONFIG_H */
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 72383727..9e54815c 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -24,6 +24,11 @@
*/
#include "afl-fuzz.h"
+#include <string.h>
+
+static u8 *strnstr(const u8 *s, const u8 *find, size_t slen);
+static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from,
+ u8 *to);
/* MOpt */
@@ -362,6 +367,463 @@ static void locate_diffs(u8 *ptr1, u8 *ptr2, u32 len, s32 *first, s32 *last) {
#endif /* !IGNORE_FINDS */
+/* Not pretty, but saves a lot of writing */
+#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size
+
+static u8 *strnstr(const u8 *s, const u8 *find, size_t slen) {
+
+ char c, sc;
+ size_t len;
+
+ if ((c = *find++) != '\0') {
+
+ len = strlen(find);
+ do {
+
+ do {
+
+ if (slen-- < 1 || (sc = *s++) == '\0') return (NULL);
+
+ } while (sc != c);
+
+ if (len > slen) return (NULL);
+
+ } while (strncmp(s, find, len) != 0);
+
+ s--;
+
+ }
+
+ return ((u8 *)s);
+
+}
+
+/* replace between deliminators, if rep == NULL, then we will duplicate the
+ * target */
+
+static u32 delim_replace(u8 **out_buf, s32 *temp_len, size_t pos,
+ const u8 *ldelim, const u8 *rdelim, u8 *rep) {
+
+ u8 *end_buf = *out_buf + *temp_len;
+ u8 *ldelim_start = strnstr(*out_buf + pos, ldelim, *temp_len - pos);
+
+ if (ldelim_start != NULL) {
+
+ u32 max = (end_buf - ldelim_start - 1 > AFL_TXT_STRING_MAX_LEN
+ ? AFL_TXT_STRING_MAX_LEN
+ : end_buf - ldelim_start - 1);
+
+ if (max > 0) {
+
+ u8 *rdelim_end = strnstr(ldelim_start + 1, rdelim, max);
+
+ if (rdelim_end != NULL) {
+
+ u32 rep_len, delim_space_len = rdelim_end - ldelim_start - 1, xtra = 0;
+
+ if (rep != NULL) {
+
+ rep_len = (u32)strlen(rep);
+
+ } else { // NULL? then we copy the value in between the delimiters
+
+ rep_len = delim_space_len;
+ delim_space_len = 0;
+ rep = ldelim_start + 1;
+ xtra = rep_len;
+
+ }
+
+ if (rep_len != delim_space_len) {
+
+ memmove(ldelim_start + rep_len + xtra + 1, rdelim_end,
+ *temp_len - (rdelim_end - *out_buf));
+
+ }
+
+ memcpy(ldelim_start + 1, rep, rep_len);
+ *temp_len = (*temp_len - delim_space_len + rep_len);
+
+ return 1;
+
+ }
+
+ }
+
+ }
+
+ return 0;
+
+}
+
+static u32 delim_swap(u8 **out_buf, s32 *temp_len, size_t pos, const u8 *ldelim,
+ const u8 *mdelim, const u8 *rdelim) {
+
+ u8 *out_buf_end = *out_buf + *temp_len;
+ u32 max = (*temp_len - pos > AFL_TXT_STRING_MAX_LEN ? AFL_TXT_STRING_MAX_LEN
+ : *temp_len - pos);
+ u8 *ldelim_start = strnstr(*out_buf + pos, ldelim, max);
+
+ if (ldelim_start != NULL) {
+
+ max = (out_buf_end - ldelim_start - 1 > AFL_TXT_STRING_MAX_LEN
+ ? AFL_TXT_STRING_MAX_LEN
+ : out_buf_end - ldelim_start - 1);
+ if (max > 1) {
+
+ u8 *mdelim_pos = strnstr(ldelim_start + 1, mdelim, max);
+
+ if (mdelim_pos != NULL) {
+
+ max = (out_buf_end - mdelim_pos - 1 > AFL_TXT_STRING_MAX_LEN
+ ? AFL_TXT_STRING_MAX_LEN
+ : out_buf_end - mdelim_pos - 1);
+ if (max > 0) {
+
+ u8 *rdelim_end = strnstr(mdelim + 1, rdelim, max);
+
+ if (rdelim_end != NULL) {
+
+ u32 first_len = mdelim_pos - ldelim_start - 1;
+ u32 second_len = rdelim_end - mdelim_pos - 1;
+ u8 scratch[AFL_TXT_STRING_MAX_LEN];
+
+ memcpy(scratch, ldelim_start + 1, first_len);
+
+ if (first_len != second_len) {
+
+ memmove(ldelim_start + second_len + 1, mdelim_pos,
+ out_buf_end - mdelim_pos);
+
+ }
+
+ memcpy(ldelim_start + 1, mdelim_pos + 1, second_len);
+
+ if (first_len != second_len) {
+
+ memmove(mdelim_pos + first_len + 1, rdelim_end,
+ out_buf_end - rdelim_end);
+
+ }
+
+ memcpy(mdelim_pos + 1, scratch, first_len);
+
+ return 1;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ return 0;
+
+}
+
+static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from,
+ u8 *to) {
+
+ u8 *start = strnstr(*out_buf + pos, from, *temp_len - pos);
+
+ if (start) {
+
+ u32 from_len = strlen(from);
+ u32 to_len = strlen(to);
+
+ if (from_len != to_len) {
+
+ memmove(start + to_len, start + from_len,
+ *temp_len - from_len - (start - *out_buf));
+
+ }
+
+ memcpy(start, to, to_len);
+ *temp_len = (*temp_len - from_len + to_len);
+
+ return 1;
+
+ }
+
+ return 0;
+
+}
+
+/* Returns 1 if a mutant was generated and placed in out_buf, 0 if none
+ * generated. */
+
+static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
+
+ s32 temp_len;
+ u32 pos, yes = 0,
+ mutations = rand_below(afl, AFL_TXT_STRING_MAX_MUTATIONS) + 1;
+ u8 *new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch),
+ *orig_temp_len + AFL_TXT_STRING_MAX_MUTATIONS);
+ temp_len = *orig_temp_len;
+ memcpy(new_buf, *out_buf, temp_len);
+
+ for (u32 i = 0; i < mutations; i++) {
+
+ if (temp_len < AFL_TXT_MIN_LEN) {
+
+ if (yes)
+ return 1;
+ else
+ return 0;
+
+ }
+
+ pos = rand_below(afl, temp_len - 1);
+ int choice = rand_below(afl, 76);
+ switch (choice) {
+
+ case 0:
+ yes += string_replace(out_buf, &temp_len, pos, "*", " ");
+ break;
+ case 1:
+ yes += string_replace(out_buf, &temp_len, pos, "(", "(!");
+ break;
+ case 2:
+ yes += string_replace(out_buf, &temp_len, pos, "==", "!=");
+ break;
+ case 3:
+ yes += string_replace(out_buf, &temp_len, pos, "!=", "==");
+ break;
+ case 4:
+ yes += string_replace(out_buf, &temp_len, pos, "==", "<");
+ break;
+ case 5:
+ yes += string_replace(out_buf, &temp_len, pos, "<", "==");
+ break;
+ case 6:
+ yes += string_replace(out_buf, &temp_len, pos, "==", ">");
+ break;
+ case 7:
+ yes += string_replace(out_buf, &temp_len, pos, ">", "==");
+ break;
+ case 8:
+ yes += string_replace(out_buf, &temp_len, pos, "=", "<");
+ break;
+ case 9:
+ yes += string_replace(out_buf, &temp_len, pos, "=", ">");
+ break;
+ case 10:
+ yes += string_replace(out_buf, &temp_len, pos, "<", ">");
+ break;
+ case 11:
+ yes += string_replace(out_buf, &temp_len, pos, ">", "<");
+ break;
+ case 12:
+ yes += string_replace(out_buf, &temp_len, pos, "++", "--");
+ break;
+ case 13:
+ yes += string_replace(out_buf, &temp_len, pos, "--", "++");
+ break;
+ case 14:
+ yes += string_replace(out_buf, &temp_len, pos, "+", "-");
+ break;
+ case 15:
+ yes += string_replace(out_buf, &temp_len, pos, "+", "*");
+ break;
+ case 16:
+ yes += string_replace(out_buf, &temp_len, pos, "+", "/");
+ break;
+ case 17:
+ yes += string_replace(out_buf, &temp_len, pos, "+", "%");
+ break;
+ case 18:
+ yes += string_replace(out_buf, &temp_len, pos, "*", "-");
+ break;
+ case 19:
+ yes += string_replace(out_buf, &temp_len, pos, "*", "+");
+ break;
+ case 20:
+ yes += string_replace(out_buf, &temp_len, pos, "*", "/");
+ break;
+ case 21:
+ yes += string_replace(out_buf, &temp_len, pos, "*", "%");
+ break;
+ case 22:
+ yes += string_replace(out_buf, &temp_len, pos, "-", "+");
+ break;
+ case 23:
+ yes += string_replace(out_buf, &temp_len, pos, "-", "*");
+ break;
+ case 24:
+ yes += string_replace(out_buf, &temp_len, pos, "-", "/");
+ break;
+ case 25:
+ yes += string_replace(out_buf, &temp_len, pos, "-", "%");
+ break;
+ case 26:
+ yes += string_replace(out_buf, &temp_len, pos, "/", "-");
+ break;
+ case 27:
+ yes += string_replace(out_buf, &temp_len, pos, "/", "*");
+ break;
+ case 28:
+ yes += string_replace(out_buf, &temp_len, pos, "/", "+");
+ break;
+ case 29:
+ yes += string_replace(out_buf, &temp_len, pos, "/", "%");
+ break;
+ case 30:
+ yes += string_replace(out_buf, &temp_len, pos, "%", "-");
+ break;
+ case 31:
+ yes += string_replace(out_buf, &temp_len, pos, "%", "*");
+ break;
+ case 32:
+ yes += string_replace(out_buf, &temp_len, pos, "%", "/");
+ break;
+ case 33:
+ yes += string_replace(out_buf, &temp_len, pos, "%", "+");
+ break;
+ case 34:
+ yes += string_replace(out_buf, &temp_len, pos, "->", ".");
+ break;
+ case 35:
+ yes += string_replace(out_buf, &temp_len, pos, ".", "->");
+ break;
+ case 36:
+ yes += string_replace(out_buf, &temp_len, pos, "0", "1");
+ break;
+ case 37:
+ yes += string_replace(out_buf, &temp_len, pos, "1", "0");
+ break;
+ case 38:
+ yes += string_replace(out_buf, &temp_len, pos, "if", "while");
+ break;
+ case 39:
+ yes += string_replace(out_buf, &temp_len, pos, "while", "if");
+ break;
+ case 40:
+ yes += string_replace(out_buf, &temp_len, pos, "!", " ");
+ break;
+ case 41:
+ yes += string_replace(out_buf, &temp_len, pos, "&&", "||");
+ break;
+ case 42:
+ yes += string_replace(out_buf, &temp_len, pos, "||", "&&");
+ break;
+ case 43:
+ yes += string_replace(out_buf, &temp_len, pos, "!", "");
+ break;
+ case 44:
+ yes += string_replace(out_buf, &temp_len, pos, "==", "=");
+ break;
+ case 45:
+ yes += string_replace(out_buf, &temp_len, pos, "--", "");
+ break;
+ case 46:
+ yes += string_replace(out_buf, &temp_len, pos, "<<", "<");
+ break;
+ case 47:
+ yes += string_replace(out_buf, &temp_len, pos, ">>", ">");
+ break;
+ case 48:
+ yes += string_replace(out_buf, &temp_len, pos, "<", "<<");
+ break;
+ case 49:
+ yes += string_replace(out_buf, &temp_len, pos, ">", ">>");
+ break;
+ case 50:
+ yes += string_replace(out_buf, &temp_len, pos, "\"", "'");
+ break;
+ case 51:
+ yes += string_replace(out_buf, &temp_len, pos, "'", "\"");
+ break;
+ case 52:
+ yes += string_replace(out_buf, &temp_len, pos, "(", "\"");
+ break;
+ case 53:
+ yes += string_replace(out_buf, &temp_len, pos, "\n", " ");
+ break;
+ case 54:
+ yes += string_replace(out_buf, &temp_len, pos, "\n", ";");
+ break;
+ case 55:
+ yes += string_replace(out_buf, &temp_len, pos, "\n", "<");
+ break;
+ case 56: /* Remove a semicolon delimited statement after a semicolon */
+ yes += delim_replace(out_buf, &temp_len, pos, ";", ";", ";");
+ break;
+ case 57: /* Remove a semicolon delimited statement after a left curly
+ brace */
+ yes += delim_replace(out_buf, &temp_len, pos, "}", ";", "}");
+ break;
+ case 58: /* Remove a curly brace construct */
+ yes += delim_replace(out_buf, &temp_len, pos, "{", "}", "");
+ break;
+ case 59: /* Replace a curly brace construct with an empty one */
+ yes += delim_replace(out_buf, &temp_len, pos, "{", "}", "{}");
+ break;
+ case 60:
+ yes += delim_swap(out_buf, &temp_len, pos, ";", ";", ";");
+ break;
+ case 61:
+ yes += delim_swap(out_buf, &temp_len, pos, "}", ";", ";");
+ break;
+ case 62: /* Swap comma delimited things case 1 */
+ yes += delim_swap(out_buf, &temp_len, pos, "(", ",", ")");
+ break;
+ case 63: /* Swap comma delimited things case 2 */
+ yes += delim_swap(out_buf, &temp_len, pos, "(", ",", ",");
+ break;
+ case 64: /* Swap comma delimited things case 3 */
+ yes += delim_swap(out_buf, &temp_len, pos, ",", ",", ",");
+ break;
+ case 65: /* Swap comma delimited things case 4 */
+ yes += delim_swap(out_buf, &temp_len, pos, ",", ",", ")");
+ break;
+ case 66: /* Just delete a line */
+ yes += delim_replace(out_buf, &temp_len, pos, "\n", "\n", "");
+ break;
+ case 67: /* Delete something like "const" case 1 */
+ yes += delim_replace(out_buf, &temp_len, pos, " ", " ", "");
+ break;
+ case 68: /* Delete something like "const" case 2 */
+ yes += delim_replace(out_buf, &temp_len, pos, "\n", " ", "");
+ break;
+ case 69: /* Delete something like "const" case 3 */
+ yes += delim_replace(out_buf, &temp_len, pos, "(", " ", "");
+ break;
+ case 70: /* Swap space delimited things case 1 */
+ yes += delim_swap(out_buf, &temp_len, pos, " ", " ", " ");
+ break;
+ case 71: /* Swap space delimited things case 2 */
+ yes += delim_swap(out_buf, &temp_len, pos, " ", " ", ")");
+ break;
+ case 72: /* Swap space delimited things case 3 */
+ yes += delim_swap(out_buf, &temp_len, pos, "(", " ", " ");
+ break;
+ case 73: /* Swap space delimited things case 4 */
+ yes += delim_swap(out_buf, &temp_len, pos, "(", " ", ")");
+ break;
+ case 74: /* Duplicate a single line of code */
+ yes += delim_replace(out_buf, &temp_len, pos, "\n", "\n", NULL);
+ break;
+ case 75: /* Duplicate a construct (most often, a non-nested for loop */
+ yes += delim_replace(out_buf, &temp_len, pos, "\n", "}", NULL);
+ break;
+
+ }
+
+ }
+
+ if (yes == 0 || temp_len <= 0) { return 0; }
+
+ swap_bufs(BUF_PARAMS(out), BUF_PARAMS(out_scratch));
+ *out_buf = new_buf;
+ *orig_temp_len = temp_len;
+
+ return 1;
+
+}
+
/* Take the current entry from the queue, fuzz it for a while. This
function is a tad too long... returns 0 if fuzzed successfully, 1 if
skipped or bailed out. */
@@ -378,9 +840,6 @@ u8 fuzz_one_original(afl_state_t *afl) {
u8 a_collect[MAX_AUTO_EXTRA];
u32 a_len = 0;
-/* Not pretty, but saves a lot of writing */
-#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size
-
#ifdef IGNORE_FINDS
/* In IGNORE_FINDS mode, skip any entries that weren't in the
@@ -1854,9 +2313,12 @@ havoc_stage:
/* We essentially just do several thousand runs (depending on perf_score)
where we take the input file and make random stacked tweaks. */
+ u32 r_max = 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0) +
+ (afl->queue_cur->is_ascii ? AFL_TXT_BIAS : 0);
+
for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max; ++afl->stage_cur) {
- u32 use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2));
+ u32 r, use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2));
afl->stage_cur_val = use_stacking;
@@ -1896,8 +2358,9 @@ havoc_stage:
}
- switch (rand_below(
- afl, 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0))) {
+ retry_havoc:
+
+ switch ((r = rand_below(afl, r_max))) {
case 0:
@@ -2192,85 +2655,96 @@ havoc_stage:
}
- /* Values 15 and 16 can be selected only if there are any extras
- present in the dictionaries. */
+ // TODO: add splicing mutation here.
+ // 15:
+ // break;
- case 15: {
+ default:
+ if (r == 15 && (afl->extras_cnt || afl->a_extras_cnt)) {
- /* Overwrite bytes with an extra. */
+ /* Values 15 and 16 can be selected only if there are any extras
+ present in the dictionaries. */
- if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
+ /* Overwrite bytes with an extra. */
- /* No user-specified extras or odds in our favor. Let's use an
- auto-detected one. */
+ if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
- u32 use_extra = rand_below(afl, afl->a_extras_cnt);
- u32 extra_len = afl->a_extras[use_extra].len;
- u32 insert_at;
+ /* No user-specified extras or odds in our favor. Let's use an
+ auto-detected one. */
- if (extra_len > temp_len) { break; }
+ u32 use_extra = rand_below(afl, afl->a_extras_cnt);
+ u32 extra_len = afl->a_extras[use_extra].len;
+ u32 insert_at;
- insert_at = rand_below(afl, temp_len - extra_len + 1);
- memcpy(out_buf + insert_at, afl->a_extras[use_extra].data,
- extra_len);
+ if (extra_len > temp_len) { break; }
- } else {
+ insert_at = rand_below(afl, temp_len - extra_len + 1);
+ memcpy(out_buf + insert_at, afl->a_extras[use_extra].data,
+ extra_len);
- /* No auto extras or odds in our favor. Use the dictionary. */
+ } else {
- u32 use_extra = rand_below(afl, afl->extras_cnt);
- u32 extra_len = afl->extras[use_extra].len;
- u32 insert_at;
+ /* No auto extras or odds in our favor. Use the dictionary. */
- if (extra_len > temp_len) { break; }
+ u32 use_extra = rand_below(afl, afl->extras_cnt);
+ u32 extra_len = afl->extras[use_extra].len;
+ u32 insert_at;
- insert_at = rand_below(afl, temp_len - extra_len + 1);
- memcpy(out_buf + insert_at, afl->extras[use_extra].data, extra_len);
+ if (extra_len > temp_len) { break; }
- }
+ insert_at = rand_below(afl, temp_len - extra_len + 1);
+ memcpy(out_buf + insert_at, afl->extras[use_extra].data,
+ extra_len);
- break;
+ }
- }
+ } else if (r == 16 && (afl->extras_cnt || afl->a_extras_cnt)) {
- case 16: {
+ u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1);
+ u8 *ptr;
- u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1);
- u8 *ptr;
+ /* Insert an extra. Do the same dice-rolling stuff as for the
+ previous case. */
- /* Insert an extra. Do the same dice-rolling stuff as for the
- previous case. */
+ if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
- if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
+ use_extra = rand_below(afl, afl->a_extras_cnt);
+ extra_len = afl->a_extras[use_extra].len;
+ ptr = afl->a_extras[use_extra].data;
- use_extra = rand_below(afl, afl->a_extras_cnt);
- extra_len = afl->a_extras[use_extra].len;
- ptr = afl->a_extras[use_extra].data;
+ } else {
- } else {
+ use_extra = rand_below(afl, afl->extras_cnt);
+ extra_len = afl->extras[use_extra].len;
+ ptr = afl->extras[use_extra].data;
- use_extra = rand_below(afl, afl->extras_cnt);
- extra_len = afl->extras[use_extra].len;
- ptr = afl->extras[use_extra].data;
+ }
- }
+ if (temp_len + extra_len >= MAX_FILE) { break; }
- if (temp_len + extra_len >= MAX_FILE) { break; }
+ out_buf = ck_maybe_grow(BUF_PARAMS(out), temp_len + extra_len);
- out_buf = ck_maybe_grow(BUF_PARAMS(out), temp_len + extra_len);
+ /* Tail */
+ memmove(out_buf + insert_at + extra_len, out_buf + insert_at,
+ temp_len - insert_at);
- /* Tail */
- memmove(out_buf + insert_at + extra_len, out_buf + insert_at,
- temp_len - insert_at);
+ /* Inserted part */
+ memcpy(out_buf + insert_at, ptr, extra_len);
- /* Inserted part */
- memcpy(out_buf + insert_at, ptr, extra_len);
+ temp_len += extra_len;
- temp_len += extra_len;
+ } else {
- break;
+ // ascii mutations
+ if (text_mutation(afl, &out_buf, &temp_len) == 0) goto retry_havoc;
- }
+ //#ifdef _AFL_DOCUMENT_MUTATIONS
+ // fprintf(stderr, "MUTATED: %s/mutations/%09u:*\n",
+ // afl->out_dir,
+ // afl->document_counter);
+ //#endif
+
+ }
}
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index 7afdd9f1..da6b1eee 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -24,6 +24,7 @@
#include "afl-fuzz.h"
#include <limits.h>
+#include <ctype.h>
/* Mark deterministic checks as done for a particular queue entry. We use the
.state file to avoid repeating deterministic fuzzing when resuming aborted
@@ -100,6 +101,119 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
}
+/* check if ascii or UTF-8 */
+
+static u8 check_if_text(struct queue_entry *q) {
+
+ if (q->len < AFL_TXT_MIN_LEN) return 0;
+
+ u8 buf[MAX_FILE], bom[3] = {0xef, 0xbb, 0xbf};
+ s32 fd, len = q->len, offset = 0, ascii = 0, utf8 = 0, comp;
+
+ if ((fd = open(q->fname, O_RDONLY)) < 0) return 0;
+ if ((comp = read(fd, buf, len)) != len) return 0;
+ close(fd);
+
+ while (offset < len) {
+
+ // ASCII: <= 0x7F to allow ASCII control characters
+ if ((buf[offset + 0] == 0x09 || buf[offset + 0] == 0x0A ||
+ buf[offset + 0] == 0x0D ||
+ (0x20 <= buf[offset + 0] && buf[offset + 0] <= 0x7E))) {
+
+ offset++;
+ utf8++;
+ ascii++;
+ continue;
+
+ }
+
+ if (isascii((int)buf[offset]) || isprint((int)buf[offset])) {
+
+ ascii++;
+ // we continue though as it can also be a valid utf8
+
+ }
+
+ // non-overlong 2-byte
+ if (((0xC2 <= buf[offset + 0] && buf[offset + 0] <= 0xDF) &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))) {
+
+ offset += 2;
+ utf8++;
+ comp--;
+ continue;
+
+ }
+
+ // excluding overlongs
+ if ((buf[offset + 0] == 0xE0 &&
+ (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] &&
+ buf[offset + 2] <= 0xBF)) || // straight 3-byte
+ (((0xE1 <= buf[offset + 0] && buf[offset + 0] <= 0xEC) ||
+ buf[offset + 0] == 0xEE || buf[offset + 0] == 0xEF) &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] &&
+ buf[offset + 2] <= 0xBF)) || // excluding surrogates
+ (buf[offset + 0] == 0xED &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF))) {
+
+ offset += 3;
+ utf8++;
+ comp -= 2;
+ continue;
+
+ }
+
+ // planes 1-3
+ if ((buf[offset + 0] == 0xF0 &&
+ (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+ (0x80 <= buf[offset + 3] &&
+ buf[offset + 3] <= 0xBF)) || // planes 4-15
+ ((0xF1 <= buf[offset + 0] && buf[offset + 0] <= 0xF3) &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+ (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // plane 16
+ (buf[offset + 0] == 0xF4 &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+ (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF))) {
+
+ offset += 4;
+ utf8++;
+ comp -= 3;
+ continue;
+
+ }
+
+ // handle utf8 bom
+ if (buf[offset + 0] == bom[0] && buf[offset + 1] == bom[1] &&
+ buf[offset + 2] == bom[2]) {
+
+ offset += 3;
+ utf8++;
+ comp -= 2;
+ continue;
+
+ }
+
+ offset++;
+
+ }
+
+ u32 percent_utf8 = (utf8 * 100) / comp;
+ u32 percent_ascii = (ascii * 100) / len;
+
+ if (percent_utf8 >= percent_ascii && percent_utf8 >= AFL_TXT_MIN_PERCENT)
+ return 2;
+ if (percent_ascii >= AFL_TXT_MIN_PERCENT) return 1;
+ return 0;
+
+}
+
/* Append new test case to the queue. */
void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
@@ -159,6 +273,8 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
}
+ q->is_ascii = check_if_text(q);
+
}
/* Destroy the entire queue. */
diff --git a/src/afl-performance.c b/src/afl-performance.c
index 0c1697a8..6631f148 100644
--- a/src/afl-performance.c
+++ b/src/afl-performance.c
@@ -44,10 +44,12 @@ void rand_set_seed(afl_state_t *afl, s64 init_seed) {
}
-uint64_t rand_next(afl_state_t *afl) {
+uint32_t rand_next(afl_state_t *afl) {
- const uint64_t result =
- rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0];
+ const uint32_t result =
+ (uint32_t)rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) +
+ afl->rand_seed[0];
+ // const uint32_t result = (uint32_t) rotl(afl->rand_seed[1] * 5, 7) * 9;
const uint64_t t = afl->rand_seed[1] << 17;
@@ -60,7 +62,7 @@ uint64_t rand_next(afl_state_t *afl) {
afl->rand_seed[3] = rotl(afl->rand_seed[3], 45);
- return result;
+ return (uint32_t)result;
}