about summary refs log tree commit diff
diff options
context:
space:
mode:
authorvanhauser-thc <vh@thc.org>2021-03-02 17:46:43 +0100
committervanhauser-thc <vh@thc.org>2021-03-02 17:46:43 +0100
commit108e588e888df5c2679600ea49846a565bac23f9 (patch)
treee99fce6dd06540c8df011f1dcc6aefe556a6e316
parent333509bb0a56be9bd2e236f0e2f37d4af2dd7d59 (diff)
downloadafl++-108e588e888df5c2679600ea49846a565bac23f9.tar.gz
add de-unicoded dictionary entries
-rw-r--r--docs/Changelog.md2
-rw-r--r--include/afl-fuzz.h1
-rw-r--r--src/afl-fuzz-extras.c149
-rw-r--r--src/afl-fuzz.c7
4 files changed, 134 insertions, 25 deletions
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 01240b2a..376f5f06 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -9,6 +9,8 @@ Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 ### Version ++3.11a (dev)
+  - afl-fuzz
+    - add non-unicode variants from unicode-looking dictionary entries
   - afl-cc
     - fixed for a crash that can occur with ASAN + CMPLOG together plus
       better support for unicode (thanks to @stbergmann for reporting!)
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 3531d672..5003b563 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -1062,6 +1062,7 @@ u8 has_new_bits_unclassified(afl_state_t *, u8 *);
 void load_extras_file(afl_state_t *, u8 *, u32 *, u32 *, u32);
 void load_extras(afl_state_t *, u8 *);
 void dedup_extras(afl_state_t *);
+void deunicode_extras(afl_state_t *);
 void add_extra(afl_state_t *afl, u8 *mem, u32 len);
 void maybe_add_auto(afl_state_t *, u8 *, u32);
 void save_auto(afl_state_t *);
diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c
index 7ecad233..52100fa1 100644
--- a/src/afl-fuzz-extras.c
+++ b/src/afl-fuzz-extras.c
@@ -387,6 +387,130 @@ static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {
 
 }
 
+/* add an extra/dict/token - no checks performed, no sorting */
+
+static void add_extra_nocheck(afl_state_t *afl, u8 *mem, u32 len) {
+
+  afl->extras = afl_realloc((void **)&afl->extras,
+                            (afl->extras_cnt + 1) * sizeof(struct extra_data));
+
+  if (unlikely(!afl->extras)) { PFATAL("alloc"); }
+
+  afl->extras[afl->extras_cnt].data = ck_alloc(len);
+  afl->extras[afl->extras_cnt].len = len;
+  memcpy(afl->extras[afl->extras_cnt].data, mem, len);
+  afl->extras_cnt++;
+
+  /* We only want to print this once */
+
+  if (afl->extras_cnt == afl->max_det_extras + 1) {
+
+    WARNF("More than %u tokens - will use them probabilistically.",
+          afl->max_det_extras);
+
+  }
+
+}
+
+/* Sometimes strings in input is transformed to unicode internally, so for
+   fuzzing we should attempt to de-unicode if it looks like simple unicode */
+
+void deunicode_extras(afl_state_t *afl) {
+
+  if (!afl->extras_cnt) return;
+
+  u32 i, j, orig_cnt = afl->extras_cnt;
+  u8  buf[64];
+
+  for (i = 0; i < orig_cnt; ++i) {
+
+    if (afl->extras[i].len < 6 || afl->extras[i].len > 64 ||
+        afl->extras[i].len % 2) {
+
+      continue;
+
+    }
+
+    u32 k = 0, z1 = 0, z2 = 0, z3 = 0, z4 = 0, half = afl->extras[i].len >> 1;
+    u32 quarter = half >> 1;
+
+    for (j = 0; j < afl->extras[i].len; ++j) {
+
+      switch (j % 4) {
+
+        case 2:
+          if (!afl->extras[i].data[j]) { ++z3; }
+          // fall through
+        case 0:
+          if (!afl->extras[i].data[j]) { ++z1; }
+          break;
+        case 3:
+          if (!afl->extras[i].data[j]) { ++z4; }
+          // fall through
+        case 1:
+          if (!afl->extras[i].data[j]) { ++z2; }
+          break;
+
+      }
+
+    }
+
+    if ((z1 < half && z2 < half) || z1 + z2 == afl->extras[i].len) { continue; }
+
+    // also maybe 32 bit unicode?
+    if (afl->extras[i].len % 4 == 0 && afl->extras[i].len >= 12 &&
+        (z3 == quarter || z4 == quarter) && z1 + z2 == quarter * 3) {
+
+      for (j = 0; j < afl->extras[i].len; ++j) {
+
+        if (z4 < quarter) {
+
+          if (j % 4 == 3) { buf[k++] = afl->extras[i].data[j]; }
+
+        } else if (z3 < quarter) {
+
+          if (j % 4 == 2) { buf[k++] = afl->extras[i].data[j]; }
+
+        } else if (z2 < half) {
+
+          if (j % 4 == 1) { buf[k++] = afl->extras[i].data[j]; }
+
+        } else {
+
+          if (j % 4 == 0) { buf[k++] = afl->extras[i].data[j]; }
+
+        }
+
+      }
+
+      add_extra_nocheck(afl, buf, k);
+      k = 0;
+
+    }
+
+    for (j = 0; j < afl->extras[i].len; ++j) {
+
+      if (z1 < half) {
+
+        if (j % 2 == 0) { buf[k++] = afl->extras[i].data[j]; }
+
+      } else {
+
+        if (j % 2 == 1) { buf[k++] = afl->extras[i].data[j]; }
+
+      }
+
+    }
+
+    add_extra_nocheck(afl, buf, k);
+
+  }
+
+  qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
+        compare_extras_len);
+
+}
+
 /* Removes duplicates from the loaded extras. This can happen if multiple files
    are loaded */
 
@@ -396,9 +520,9 @@ void dedup_extras(afl_state_t *afl) {
 
   u32 i, j, orig_cnt = afl->extras_cnt;
 
-  for (i = 0; i < afl->extras_cnt - 1; i++) {
+  for (i = 0; i < afl->extras_cnt - 1; ++i) {
 
-    for (j = i + 1; j < afl->extras_cnt; j++) {
+    for (j = i + 1; j < afl->extras_cnt; ++j) {
 
     restart_dedup:
 
@@ -462,30 +586,11 @@ void add_extra(afl_state_t *afl, u8 *mem, u32 len) {
 
   }
 
-  afl->extras = afl_realloc((void **)&afl->extras,
-                            (afl->extras_cnt + 1) * sizeof(struct extra_data));
-
-  if (unlikely(!afl->extras)) { PFATAL("alloc"); }
-
-  afl->extras[afl->extras_cnt].data = ck_alloc(len);
-  afl->extras[afl->extras_cnt].len = len;
-
-  memcpy(afl->extras[afl->extras_cnt].data, mem, len);
-
-  afl->extras_cnt++;
+  add_extra_nocheck(afl, mem, len);
 
   qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
         compare_extras_len);
 
-  /* We only want to print this once */
-
-  if (afl->extras_cnt == afl->max_det_extras + 1) {
-
-    WARNF("More than %u tokens - will use them probabilistically.",
-          afl->max_det_extras);
-
-  }
-
 }
 
 /* Maybe add automatic extra. */
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index a02eadb2..90f77919 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -1449,9 +1449,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
     }
 
-    dedup_extras(afl);
-    OKF("Loaded a total of %u extras.", afl->extras_cnt);
-
   }
 
   if (!afl->timeout_given) { find_timeout(afl); }  // only for resumes!
@@ -1681,6 +1678,10 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  deunicode_extras(afl);
+  dedup_extras(afl);
+  if (afl->extras_cnt) { OKF("Loaded a total of %u extras.", afl->extras_cnt); }
+
   // after we have the correct bitmap size we can read the bitmap -B option
   // and set the virgin maps
   if (afl->in_bitmap) {