Merge branch 'text_inputs' into dev

author: van Hauser <vh@thc.org> 2020-06-29 18:35:51 +0200
committer: GitHub <noreply@github.com> 2020-06-29 18:35:51 +0200
commit: 6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7 (patch)
tree: 4c762f1e2cfb4a8741c08b5b60d07c2ae8eee860
parent: 07648f75ea5ef8f03a92db0c7566da8c229dc27b (diff)
parent: 76a2d9b59b23873c8a6d174a2f3c48eba60712fb (diff)
download: afl++-6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7.tar.gz
5 files changed, 680 insertions, 63 deletions
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index c9f84c61..ca785e47 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -139,7 +139,8 @@ struct queue_entry {
       var_behavior,                     /* Variable behavior?               */
       favored,                          /* Currently favored?               */
       fs_redundant,                     /* Marked as redundant in the fs?   */
-      fully_colorized;                  /* Do not run redqueen stage again  */
+      fully_colorized,                  /* Do not run redqueen stage again  */
+      is_ascii;                         /* Is the input just ascii text?    */
 
   u32 bitmap_size,                      /* Number of bits set in bitmap     */
       fuzz_level;                       /* Number of fuzzing iterations     */
@@ -947,7 +948,7 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
                         u64 exec_cksum);
 
 /* xoshiro256** */
-uint64_t rand_next(afl_state_t *afl);
+uint32_t rand_next(afl_state_t *afl);
 
 /**** Inline routines ****/
 
@@ -967,7 +968,7 @@ static inline u32 rand_below(afl_state_t *afl, u32 limit) {
 
   }
 
-  return rand_next(afl) % limit;
+  return (rand_next(afl) % limit);
 
 }
 
diff --git a/include/config.h b/include/config.h
index 087e0a76..09405a22 100644
--- a/include/config.h
+++ b/include/config.h
@@ -293,7 +293,7 @@
 
 /* Call count interval between reseeding the libc PRNG from /dev/urandom: */
 
-#define RESEED_RNG 100000
+#define RESEED_RNG 256000
 
 /* Maximum line length passed from GCC to 'as' and used for parsing
    configuration files: */
@@ -397,5 +397,29 @@
 
 // #define IGNORE_FINDS
 
+/* Text mutations */
+
+/* What is the minimum length of a queue input to be evaluated for "is_ascii"?
+ */
+
+#define AFL_TXT_MIN_LEN 12
+
+/* What is the minimum percentage of ascii characters present to be classifed
+   as "is_ascii"? */
+
+#define AFL_TXT_MIN_PERCENT 95
+
+/* How often to perform ASCII mutations 0 = disable, 1-8 are good values */
+
+#define AFL_TXT_BIAS 8
+
+/* Maximum length of a string to tamper with */
+
+#define AFL_TXT_STRING_MAX_LEN 1024
+
+/* Maximum mutations on a string */
+
+#define AFL_TXT_STRING_MAX_MUTATIONS 6
+
 #endif                                                  /* ! _HAVE_CONFIG_H */
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 72383727..9e54815c 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -24,6 +24,11 @@
  */
 
 #include "afl-fuzz.h"
+#include <string.h>
+
+static u8 *strnstr(const u8 *s, const u8 *find, size_t slen);
+static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from,
+                          u8 *to);
 
 /* MOpt */
 
@@ -362,6 +367,463 @@ static void locate_diffs(u8 *ptr1, u8 *ptr2, u32 len, s32 *first, s32 *last) {
 
 #endif                                                     /* !IGNORE_FINDS */
 
+/* Not pretty, but saves a lot of writing */
+#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size
+
+static u8 *strnstr(const u8 *s, const u8 *find, size_t slen) {
+
+  char   c, sc;
+  size_t len;
+
+  if ((c = *find++) != '\0') {
+
+    len = strlen(find);
+    do {
+
+      do {
+
+        if (slen-- < 1 || (sc = *s++) == '\0') return (NULL);
+
+      } while (sc != c);
+
+      if (len > slen) return (NULL);
+
+    } while (strncmp(s, find, len) != 0);
+
+    s--;
+
+  }
+
+  return ((u8 *)s);
+
+}
+
+/* replace between deliminators, if rep == NULL, then we will duplicate the
+ * target */
+
+static u32 delim_replace(u8 **out_buf, s32 *temp_len, size_t pos,
+                         const u8 *ldelim, const u8 *rdelim, u8 *rep) {
+
+  u8 *end_buf = *out_buf + *temp_len;
+  u8 *ldelim_start = strnstr(*out_buf + pos, ldelim, *temp_len - pos);
+
+  if (ldelim_start != NULL) {
+
+    u32 max = (end_buf - ldelim_start - 1 > AFL_TXT_STRING_MAX_LEN
+                   ? AFL_TXT_STRING_MAX_LEN
+                   : end_buf - ldelim_start - 1);
+
+    if (max > 0) {
+
+      u8 *rdelim_end = strnstr(ldelim_start + 1, rdelim, max);
+
+      if (rdelim_end != NULL) {
+
+        u32 rep_len, delim_space_len = rdelim_end - ldelim_start - 1, xtra = 0;
+
+        if (rep != NULL) {
+
+          rep_len = (u32)strlen(rep);
+
+        } else {  // NULL? then we copy the value in between the delimiters
+
+          rep_len = delim_space_len;
+          delim_space_len = 0;
+          rep = ldelim_start + 1;
+          xtra = rep_len;
+
+        }
+
+        if (rep_len != delim_space_len) {
+
+          memmove(ldelim_start + rep_len + xtra + 1, rdelim_end,
+                  *temp_len - (rdelim_end - *out_buf));
+
+        }
+
+        memcpy(ldelim_start + 1, rep, rep_len);
+        *temp_len = (*temp_len - delim_space_len + rep_len);
+
+        return 1;
+
+      }
+
+    }
+
+  }
+
+  return 0;
+
+}
+
+static u32 delim_swap(u8 **out_buf, s32 *temp_len, size_t pos, const u8 *ldelim,
+                      const u8 *mdelim, const u8 *rdelim) {
+
+  u8 *out_buf_end = *out_buf + *temp_len;
+  u32 max = (*temp_len - pos > AFL_TXT_STRING_MAX_LEN ? AFL_TXT_STRING_MAX_LEN
+                                                      : *temp_len - pos);
+  u8 *ldelim_start = strnstr(*out_buf + pos, ldelim, max);
+
+  if (ldelim_start != NULL) {
+
+    max = (out_buf_end - ldelim_start - 1 > AFL_TXT_STRING_MAX_LEN
+               ? AFL_TXT_STRING_MAX_LEN
+               : out_buf_end - ldelim_start - 1);
+    if (max > 1) {
+
+      u8 *mdelim_pos = strnstr(ldelim_start + 1, mdelim, max);
+
+      if (mdelim_pos != NULL) {
+
+        max = (out_buf_end - mdelim_pos - 1 > AFL_TXT_STRING_MAX_LEN
+                   ? AFL_TXT_STRING_MAX_LEN
+                   : out_buf_end - mdelim_pos - 1);
+        if (max > 0) {
+
+          u8 *rdelim_end = strnstr(mdelim + 1, rdelim, max);
+
+          if (rdelim_end != NULL) {
+
+            u32 first_len = mdelim_pos - ldelim_start - 1;
+            u32 second_len = rdelim_end - mdelim_pos - 1;
+            u8  scratch[AFL_TXT_STRING_MAX_LEN];
+
+            memcpy(scratch, ldelim_start + 1, first_len);
+
+            if (first_len != second_len) {
+
+              memmove(ldelim_start + second_len + 1, mdelim_pos,
+                      out_buf_end - mdelim_pos);
+
+            }
+
+            memcpy(ldelim_start + 1, mdelim_pos + 1, second_len);
+
+            if (first_len != second_len) {
+
+              memmove(mdelim_pos + first_len + 1, rdelim_end,
+                      out_buf_end - rdelim_end);
+
+            }
+
+            memcpy(mdelim_pos + 1, scratch, first_len);
+
+            return 1;
+
+          }
+
+        }
+
+      }
+
+    }
+
+  }
+
+  return 0;
+
+}
+
+static u32 string_replace(u8 **out_buf, s32 *temp_len, u32 pos, u8 *from,
+                          u8 *to) {
+
+  u8 *start = strnstr(*out_buf + pos, from, *temp_len - pos);
+
+  if (start) {
+
+    u32 from_len = strlen(from);
+    u32 to_len = strlen(to);
+
+    if (from_len != to_len) {
+
+      memmove(start + to_len, start + from_len,
+              *temp_len - from_len - (start - *out_buf));
+
+    }
+
+    memcpy(start, to, to_len);
+    *temp_len = (*temp_len - from_len + to_len);
+
+    return 1;
+
+  }
+
+  return 0;
+
+}
+
+/* Returns 1 if a mutant was generated and placed in out_buf, 0 if none
+ * generated. */
+
+static int text_mutation(afl_state_t *afl, u8 **out_buf, s32 *orig_temp_len) {
+
+  s32 temp_len;
+  u32 pos, yes = 0,
+           mutations = rand_below(afl, AFL_TXT_STRING_MAX_MUTATIONS) + 1;
+  u8 *new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch),
+                              *orig_temp_len + AFL_TXT_STRING_MAX_MUTATIONS);
+  temp_len = *orig_temp_len;
+  memcpy(new_buf, *out_buf, temp_len);
+
+  for (u32 i = 0; i < mutations; i++) {
+
+    if (temp_len < AFL_TXT_MIN_LEN) {
+
+      if (yes)
+        return 1;
+      else
+        return 0;
+
+    }
+
+    pos = rand_below(afl, temp_len - 1);
+    int choice = rand_below(afl, 76);
+    switch (choice) {
+
+      case 0:
+        yes += string_replace(out_buf, &temp_len, pos, "*", " ");
+        break;
+      case 1:
+        yes += string_replace(out_buf, &temp_len, pos, "(", "(!");
+        break;
+      case 2:
+        yes += string_replace(out_buf, &temp_len, pos, "==", "!=");
+        break;
+      case 3:
+        yes += string_replace(out_buf, &temp_len, pos, "!=", "==");
+        break;
+      case 4:
+        yes += string_replace(out_buf, &temp_len, pos, "==", "<");
+        break;
+      case 5:
+        yes += string_replace(out_buf, &temp_len, pos, "<", "==");
+        break;
+      case 6:
+        yes += string_replace(out_buf, &temp_len, pos, "==", ">");
+        break;
+      case 7:
+        yes += string_replace(out_buf, &temp_len, pos, ">", "==");
+        break;
+      case 8:
+        yes += string_replace(out_buf, &temp_len, pos, "=", "<");
+        break;
+      case 9:
+        yes += string_replace(out_buf, &temp_len, pos, "=", ">");
+        break;
+      case 10:
+        yes += string_replace(out_buf, &temp_len, pos, "<", ">");
+        break;
+      case 11:
+        yes += string_replace(out_buf, &temp_len, pos, ">", "<");
+        break;
+      case 12:
+        yes += string_replace(out_buf, &temp_len, pos, "++", "--");
+        break;
+      case 13:
+        yes += string_replace(out_buf, &temp_len, pos, "--", "++");
+        break;
+      case 14:
+        yes += string_replace(out_buf, &temp_len, pos, "+", "-");
+        break;
+      case 15:
+        yes += string_replace(out_buf, &temp_len, pos, "+", "*");
+        break;
+      case 16:
+        yes += string_replace(out_buf, &temp_len, pos, "+", "/");
+        break;
+      case 17:
+        yes += string_replace(out_buf, &temp_len, pos, "+", "%");
+        break;
+      case 18:
+        yes += string_replace(out_buf, &temp_len, pos, "*", "-");
+        break;
+      case 19:
+        yes += string_replace(out_buf, &temp_len, pos, "*", "+");
+        break;
+      case 20:
+        yes += string_replace(out_buf, &temp_len, pos, "*", "/");
+        break;
+      case 21:
+        yes += string_replace(out_buf, &temp_len, pos, "*", "%");
+        break;
+      case 22:
+        yes += string_replace(out_buf, &temp_len, pos, "-", "+");
+        break;
+      case 23:
+        yes += string_replace(out_buf, &temp_len, pos, "-", "*");
+        break;
+      case 24:
+        yes += string_replace(out_buf, &temp_len, pos, "-", "/");
+        break;
+      case 25:
+        yes += string_replace(out_buf, &temp_len, pos, "-", "%");
+        break;
+      case 26:
+        yes += string_replace(out_buf, &temp_len, pos, "/", "-");
+        break;
+      case 27:
+        yes += string_replace(out_buf, &temp_len, pos, "/", "*");
+        break;
+      case 28:
+        yes += string_replace(out_buf, &temp_len, pos, "/", "+");
+        break;
+      case 29:
+        yes += string_replace(out_buf, &temp_len, pos, "/", "%");
+        break;
+      case 30:
+        yes += string_replace(out_buf, &temp_len, pos, "%", "-");
+        break;
+      case 31:
+        yes += string_replace(out_buf, &temp_len, pos, "%", "*");
+        break;
+      case 32:
+        yes += string_replace(out_buf, &temp_len, pos, "%", "/");
+        break;
+      case 33:
+        yes += string_replace(out_buf, &temp_len, pos, "%", "+");
+        break;
+      case 34:
+        yes += string_replace(out_buf, &temp_len, pos, "->", ".");
+        break;
+      case 35:
+        yes += string_replace(out_buf, &temp_len, pos, ".", "->");
+        break;
+      case 36:
+        yes += string_replace(out_buf, &temp_len, pos, "0", "1");
+        break;
+      case 37:
+        yes += string_replace(out_buf, &temp_len, pos, "1", "0");
+        break;
+      case 38:
+        yes += string_replace(out_buf, &temp_len, pos, "if", "while");
+        break;
+      case 39:
+        yes += string_replace(out_buf, &temp_len, pos, "while", "if");
+        break;
+      case 40:
+        yes += string_replace(out_buf, &temp_len, pos, "!", " ");
+        break;
+      case 41:
+        yes += string_replace(out_buf, &temp_len, pos, "&&", "||");
+        break;
+      case 42:
+        yes += string_replace(out_buf, &temp_len, pos, "||", "&&");
+        break;
+      case 43:
+        yes += string_replace(out_buf, &temp_len, pos, "!", "");
+        break;
+      case 44:
+        yes += string_replace(out_buf, &temp_len, pos, "==", "=");
+        break;
+      case 45:
+        yes += string_replace(out_buf, &temp_len, pos, "--", "");
+        break;
+      case 46:
+        yes += string_replace(out_buf, &temp_len, pos, "<<", "<");
+        break;
+      case 47:
+        yes += string_replace(out_buf, &temp_len, pos, ">>", ">");
+        break;
+      case 48:
+        yes += string_replace(out_buf, &temp_len, pos, "<", "<<");
+        break;
+      case 49:
+        yes += string_replace(out_buf, &temp_len, pos, ">", ">>");
+        break;
+      case 50:
+        yes += string_replace(out_buf, &temp_len, pos, "\"", "'");
+        break;
+      case 51:
+        yes += string_replace(out_buf, &temp_len, pos, "'", "\"");
+        break;
+      case 52:
+        yes += string_replace(out_buf, &temp_len, pos, "(", "\"");
+        break;
+      case 53:
+        yes += string_replace(out_buf, &temp_len, pos, "\n", " ");
+        break;
+      case 54:
+        yes += string_replace(out_buf, &temp_len, pos, "\n", ";");
+        break;
+      case 55:
+        yes += string_replace(out_buf, &temp_len, pos, "\n", "<");
+        break;
+      case 56:  /* Remove a semicolon delimited statement after a semicolon */
+        yes += delim_replace(out_buf, &temp_len, pos, ";", ";", ";");
+        break;
+      case 57: /* Remove a semicolon delimited statement after a left curly
+                  brace */
+        yes += delim_replace(out_buf, &temp_len, pos, "}", ";", "}");
+        break;
+      case 58:                            /* Remove a curly brace construct */
+        yes += delim_replace(out_buf, &temp_len, pos, "{", "}", "");
+        break;
+      case 59:         /* Replace a curly brace construct with an empty one */
+        yes += delim_replace(out_buf, &temp_len, pos, "{", "}", "{}");
+        break;
+      case 60:
+        yes += delim_swap(out_buf, &temp_len, pos, ";", ";", ";");
+        break;
+      case 61:
+        yes += delim_swap(out_buf, &temp_len, pos, "}", ";", ";");
+        break;
+      case 62:                        /* Swap comma delimited things case 1 */
+        yes += delim_swap(out_buf, &temp_len, pos, "(", ",", ")");
+        break;
+      case 63:                        /* Swap comma delimited things case 2 */
+        yes += delim_swap(out_buf, &temp_len, pos, "(", ",", ",");
+        break;
+      case 64:                        /* Swap comma delimited things case 3 */
+        yes += delim_swap(out_buf, &temp_len, pos, ",", ",", ",");
+        break;
+      case 65:                        /* Swap comma delimited things case 4 */
+        yes += delim_swap(out_buf, &temp_len, pos, ",", ",", ")");
+        break;
+      case 66:                                        /* Just delete a line */
+        yes += delim_replace(out_buf, &temp_len, pos, "\n", "\n", "");
+        break;
+      case 67:                      /* Delete something like "const" case 1 */
+        yes += delim_replace(out_buf, &temp_len, pos, " ", " ", "");
+        break;
+      case 68:                      /* Delete something like "const" case 2 */
+        yes += delim_replace(out_buf, &temp_len, pos, "\n", " ", "");
+        break;
+      case 69:                      /* Delete something like "const" case 3 */
+        yes += delim_replace(out_buf, &temp_len, pos, "(", " ", "");
+        break;
+      case 70:                        /* Swap space delimited things case 1 */
+        yes += delim_swap(out_buf, &temp_len, pos, " ", " ", " ");
+        break;
+      case 71:                        /* Swap space delimited things case 2 */
+        yes += delim_swap(out_buf, &temp_len, pos, " ", " ", ")");
+        break;
+      case 72:                        /* Swap space delimited things case 3 */
+        yes += delim_swap(out_buf, &temp_len, pos, "(", " ", " ");
+        break;
+      case 73:                        /* Swap space delimited things case 4 */
+        yes += delim_swap(out_buf, &temp_len, pos, "(", " ", ")");
+        break;
+      case 74:                           /* Duplicate a single line of code */
+        yes += delim_replace(out_buf, &temp_len, pos, "\n", "\n", NULL);
+        break;
+      case 75:  /* Duplicate a construct (most often, a non-nested for loop */
+        yes += delim_replace(out_buf, &temp_len, pos, "\n", "}", NULL);
+        break;
+
+    }
+
+  }
+
+  if (yes == 0 || temp_len <= 0) { return 0; }
+
+  swap_bufs(BUF_PARAMS(out), BUF_PARAMS(out_scratch));
+  *out_buf = new_buf;
+  *orig_temp_len = temp_len;
+
+  return 1;
+
+}
+
 /* Take the current entry from the queue, fuzz it for a while. This
    function is a tad too long... returns 0 if fuzzed successfully, 1 if
    skipped or bailed out. */
@@ -378,9 +840,6 @@ u8 fuzz_one_original(afl_state_t *afl) {
   u8  a_collect[MAX_AUTO_EXTRA];
   u32 a_len = 0;
 
-/* Not pretty, but saves a lot of writing */
-#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size
-
 #ifdef IGNORE_FINDS
 
   /* In IGNORE_FINDS mode, skip any entries that weren't in the
@@ -1854,9 +2313,12 @@ havoc_stage:
   /* We essentially just do several thousand runs (depending on perf_score)
      where we take the input file and make random stacked tweaks. */
 
+  u32 r_max = 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0) +
+              (afl->queue_cur->is_ascii ? AFL_TXT_BIAS : 0);
+
   for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max; ++afl->stage_cur) {
 
-    u32 use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2));
+    u32 r, use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2));
 
     afl->stage_cur_val = use_stacking;
 
@@ -1896,8 +2358,9 @@ havoc_stage:
 
       }
 
-      switch (rand_below(
-          afl, 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0))) {
+    retry_havoc:
+
+      switch ((r = rand_below(afl, r_max))) {
 
         case 0:
 
@@ -2192,85 +2655,96 @@ havoc_stage:
 
         }
 
-          /* Values 15 and 16 can be selected only if there are any extras
-             present in the dictionaries. */
+          // TODO: add splicing mutation here.
+          // 15:
+          //     break;
 
-        case 15: {
+        default:
+          if (r == 15 && (afl->extras_cnt || afl->a_extras_cnt)) {
 
-          /* Overwrite bytes with an extra. */
+            /* Values 15 and 16 can be selected only if there are any extras
+               present in the dictionaries. */
 
-          if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
+            /* Overwrite bytes with an extra. */
 
-            /* No user-specified extras or odds in our favor. Let's use an
-               auto-detected one. */
+            if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
 
-            u32 use_extra = rand_below(afl, afl->a_extras_cnt);
-            u32 extra_len = afl->a_extras[use_extra].len;
-            u32 insert_at;
+              /* No user-specified extras or odds in our favor. Let's use an
+                 auto-detected one. */
 
-            if (extra_len > temp_len) { break; }
+              u32 use_extra = rand_below(afl, afl->a_extras_cnt);
+              u32 extra_len = afl->a_extras[use_extra].len;
+              u32 insert_at;
 
-            insert_at = rand_below(afl, temp_len - extra_len + 1);
-            memcpy(out_buf + insert_at, afl->a_extras[use_extra].data,
-                   extra_len);
+              if (extra_len > temp_len) { break; }
 
-          } else {
+              insert_at = rand_below(afl, temp_len - extra_len + 1);
+              memcpy(out_buf + insert_at, afl->a_extras[use_extra].data,
+                     extra_len);
 
-            /* No auto extras or odds in our favor. Use the dictionary. */
+            } else {
 
-            u32 use_extra = rand_below(afl, afl->extras_cnt);
-            u32 extra_len = afl->extras[use_extra].len;
-            u32 insert_at;
+              /* No auto extras or odds in our favor. Use the dictionary. */
 
-            if (extra_len > temp_len) { break; }
+              u32 use_extra = rand_below(afl, afl->extras_cnt);
+              u32 extra_len = afl->extras[use_extra].len;
+              u32 insert_at;
 
-            insert_at = rand_below(afl, temp_len - extra_len + 1);
-            memcpy(out_buf + insert_at, afl->extras[use_extra].data, extra_len);
+              if (extra_len > temp_len) { break; }
 
-          }
+              insert_at = rand_below(afl, temp_len - extra_len + 1);
+              memcpy(out_buf + insert_at, afl->extras[use_extra].data,
+                     extra_len);
 
-          break;
+            }
 
-        }
+          } else if (r == 16 && (afl->extras_cnt || afl->a_extras_cnt)) {
 
-        case 16: {
+            u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1);
+            u8 *ptr;
 
-          u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1);
-          u8 *ptr;
+            /* Insert an extra. Do the same dice-rolling stuff as for the
+               previous case. */
 
-          /* Insert an extra. Do the same dice-rolling stuff as for the
-             previous case. */
+            if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
 
-          if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
+              use_extra = rand_below(afl, afl->a_extras_cnt);
+              extra_len = afl->a_extras[use_extra].len;
+              ptr = afl->a_extras[use_extra].data;
 
-            use_extra = rand_below(afl, afl->a_extras_cnt);
-            extra_len = afl->a_extras[use_extra].len;
-            ptr = afl->a_extras[use_extra].data;
+            } else {
 
-          } else {
+              use_extra = rand_below(afl, afl->extras_cnt);
+              extra_len = afl->extras[use_extra].len;
+              ptr = afl->extras[use_extra].data;
 
-            use_extra = rand_below(afl, afl->extras_cnt);
-            extra_len = afl->extras[use_extra].len;
-            ptr = afl->extras[use_extra].data;
+            }
 
-          }
+            if (temp_len + extra_len >= MAX_FILE) { break; }
 
-          if (temp_len + extra_len >= MAX_FILE) { break; }
+            out_buf = ck_maybe_grow(BUF_PARAMS(out), temp_len + extra_len);
 
-          out_buf = ck_maybe_grow(BUF_PARAMS(out), temp_len + extra_len);
+            /* Tail */
+            memmove(out_buf + insert_at + extra_len, out_buf + insert_at,
+                    temp_len - insert_at);
 
-          /* Tail */
-          memmove(out_buf + insert_at + extra_len, out_buf + insert_at,
-                  temp_len - insert_at);
+            /* Inserted part */
+            memcpy(out_buf + insert_at, ptr, extra_len);
 
-          /* Inserted part */
-          memcpy(out_buf + insert_at, ptr, extra_len);
+            temp_len += extra_len;
 
-          temp_len += extra_len;
+          } else {
 
-          break;
+            // ascii mutations
+            if (text_mutation(afl, &out_buf, &temp_len) == 0) goto retry_havoc;
 
-        }
+            //#ifdef _AFL_DOCUMENT_MUTATIONS
+            //            fprintf(stderr, "MUTATED: %s/mutations/%09u:*\n",
+            //            afl->out_dir,
+            //                    afl->document_counter);
+            //#endif
+
+          }
 
       }
 
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index 7afdd9f1..da6b1eee 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -24,6 +24,7 @@
 
 #include "afl-fuzz.h"
 #include <limits.h>
+#include <ctype.h>
 
 /* Mark deterministic checks as done for a particular queue entry. We use the
    .state file to avoid repeating deterministic fuzzing when resuming aborted
@@ -100,6 +101,119 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
 
 }
 
+/* check if ascii or UTF-8 */
+
+static u8 check_if_text(struct queue_entry *q) {
+
+  if (q->len < AFL_TXT_MIN_LEN) return 0;
+
+  u8  buf[MAX_FILE], bom[3] = {0xef, 0xbb, 0xbf};
+  s32 fd, len = q->len, offset = 0, ascii = 0, utf8 = 0, comp;
+
+  if ((fd = open(q->fname, O_RDONLY)) < 0) return 0;
+  if ((comp = read(fd, buf, len)) != len) return 0;
+  close(fd);
+
+  while (offset < len) {
+
+    // ASCII: <= 0x7F to allow ASCII control characters
+    if ((buf[offset + 0] == 0x09 || buf[offset + 0] == 0x0A ||
+         buf[offset + 0] == 0x0D ||
+         (0x20 <= buf[offset + 0] && buf[offset + 0] <= 0x7E))) {
+
+      offset++;
+      utf8++;
+      ascii++;
+      continue;
+
+    }
+
+    if (isascii((int)buf[offset]) || isprint((int)buf[offset])) {
+
+      ascii++;
+      // we continue though as it can also be a valid utf8
+
+    }
+
+    // non-overlong 2-byte
+    if (((0xC2 <= buf[offset + 0] && buf[offset + 0] <= 0xDF) &&
+         (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))) {
+
+      offset += 2;
+      utf8++;
+      comp--;
+      continue;
+
+    }
+
+    // excluding overlongs
+    if ((buf[offset + 0] == 0xE0 &&
+         (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+         (0x80 <= buf[offset + 2] &&
+          buf[offset + 2] <= 0xBF)) ||  // straight 3-byte
+        (((0xE1 <= buf[offset + 0] && buf[offset + 0] <= 0xEC) ||
+          buf[offset + 0] == 0xEE || buf[offset + 0] == 0xEF) &&
+         (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+         (0x80 <= buf[offset + 2] &&
+          buf[offset + 2] <= 0xBF)) ||  // excluding surrogates
+        (buf[offset + 0] == 0xED &&
+         (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) &&
+         (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF))) {
+
+      offset += 3;
+      utf8++;
+      comp -= 2;
+      continue;
+
+    }
+
+    // planes 1-3
+    if ((buf[offset + 0] == 0xF0 &&
+         (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+         (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+         (0x80 <= buf[offset + 3] &&
+          buf[offset + 3] <= 0xBF)) ||  // planes 4-15
+        ((0xF1 <= buf[offset + 0] && buf[offset + 0] <= 0xF3) &&
+         (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+         (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+         (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) ||  // plane 16
+        (buf[offset + 0] == 0xF4 &&
+         (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) &&
+         (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+         (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF))) {
+
+      offset += 4;
+      utf8++;
+      comp -= 3;
+      continue;
+
+    }
+
+    // handle utf8 bom
+    if (buf[offset + 0] == bom[0] && buf[offset + 1] == bom[1] &&
+        buf[offset + 2] == bom[2]) {
+
+      offset += 3;
+      utf8++;
+      comp -= 2;
+      continue;
+
+    }
+
+    offset++;
+
+  }
+
+  u32 percent_utf8 = (utf8 * 100) / comp;
+  u32 percent_ascii = (ascii * 100) / len;
+
+  if (percent_utf8 >= percent_ascii && percent_utf8 >= AFL_TXT_MIN_PERCENT)
+    return 2;
+  if (percent_ascii >= AFL_TXT_MIN_PERCENT) return 1;
+  return 0;
+
+}
+
 /* Append new test case to the queue. */
 
 void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
@@ -159,6 +273,8 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
 
   }
 
+  q->is_ascii = check_if_text(q);
+
 }
 
 /* Destroy the entire queue. */
diff --git a/src/afl-performance.c b/src/afl-performance.c
index 0c1697a8..6631f148 100644
--- a/src/afl-performance.c
+++ b/src/afl-performance.c
@@ -44,10 +44,12 @@ void rand_set_seed(afl_state_t *afl, s64 init_seed) {
 
 }
 
-uint64_t rand_next(afl_state_t *afl) {
+uint32_t rand_next(afl_state_t *afl) {
 
-  const uint64_t result =
-      rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) + afl->rand_seed[0];
+  const uint32_t result =
+      (uint32_t)rotl(afl->rand_seed[0] + afl->rand_seed[3], 23) +
+      afl->rand_seed[0];
+  //  const uint32_t result = (uint32_t) rotl(afl->rand_seed[1] * 5, 7) * 9;
 
   const uint64_t t = afl->rand_seed[1] << 17;
 
@@ -60,7 +62,7 @@ uint64_t rand_next(afl_state_t *afl) {
 
   afl->rand_seed[3] = rotl(afl->rand_seed[3], 45);
 
-  return result;
+  return (uint32_t)result;
 
 }
author	van Hauser <vh@thc.org>	2020-06-29 18:35:51 +0200
committer	GitHub <noreply@github.com>	2020-06-29 18:35:51 +0200
commit	6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7 (patch)
tree	4c762f1e2cfb4a8741c08b5b60d07c2ae8eee860
parent	07648f75ea5ef8f03a92db0c7566da8c229dc27b (diff)
parent	76a2d9b59b23873c8a6d174a2f3c48eba60712fb (diff)
download	afl++-6d9b29daca46c8912aa9ddf6c053bc8554e9e9f7.tar.gz