5 files changed, 213 insertions, 79 deletions
diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c
index d6c368d1..58ce5b6f 100644
--- a/src/afl-fuzz-extras.c
+++ b/src/afl-fuzz-extras.c
@@ -101,7 +101,8 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
 
     if (rptr < lptr || *rptr != '"') {
 
-      FATAL("Malformed name=\"value\" pair in line %u.", cur_line);
+      WARNF("Malformed name=\"value\" pair in line %u.", cur_line);
+      continue;
 
     }
 
@@ -141,13 +142,19 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
 
     if (*lptr != '"') {
 
-      FATAL("Malformed name=\"keyword\" pair in line %u.", cur_line);
+      WARNF("Malformed name=\"keyword\" pair in line %u.", cur_line);
+      continue;
 
     }
 
     ++lptr;
 
-    if (!*lptr) { FATAL("Empty keyword in line %u.", cur_line); }
+    if (!*lptr) {
+
+      WARNF("Empty keyword in line %u.", cur_line);
+      continue;
+
+    }
 
     /* Okay, let's allocate memory and copy data between "...", handling
        \xNN escaping, \\, and \". */
@@ -169,7 +176,9 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
 
         case 1 ... 31:
         case 128 ... 255:
-          FATAL("Non-printable characters in line %u.", cur_line);
+          WARNF("Non-printable characters in line %u.", cur_line);
+          continue;
+          break;
 
         case '\\':
 
@@ -185,7 +194,8 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
 
           if (*lptr != 'x' || !isxdigit(lptr[1]) || !isxdigit(lptr[2])) {
 
-            FATAL("Invalid escaping (not \\xNN) in line %u.", cur_line);
+            WARNF("Invalid escaping (not \\xNN) in line %u.", cur_line);
+            continue;
 
           }
 
@@ -209,10 +219,11 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
 
     if (afl->extras[afl->extras_cnt].len > MAX_DICT_FILE) {
 
-      FATAL(
+      WARNF(
           "Keyword too big in line %u (%s, limit is %s)", cur_line,
           stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), klen),
           stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
+      continue;
 
     }
 
@@ -232,14 +243,19 @@ static void extras_check_and_sort(afl_state_t *afl, u32 min_len, u32 max_len,
 
   u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
 
-  if (!afl->extras_cnt) { FATAL("No usable files in '%s'", dir); }
+  if (!afl->extras_cnt) {
+
+    WARNF("No usable data in '%s'", dir);
+    return;
+
+  }
 
   qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
         compare_extras_len);
 
-  OKF("Loaded %u extra tokens, size range %s to %s.", afl->extras_cnt,
-      stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), min_len),
-      stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), max_len));
+  ACTF("Loaded %u extra tokens, size range %s to %s.", afl->extras_cnt,
+       stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), min_len),
+       stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), max_len));
 
   if (max_len > 32) {
 
@@ -250,8 +266,8 @@ static void extras_check_and_sort(afl_state_t *afl, u32 min_len, u32 max_len,
 
   if (afl->extras_cnt > afl->max_det_extras) {
 
-    OKF("More than %d tokens - will use them probabilistically.",
-        afl->max_det_extras);
+    WARNF("More than %d tokens - will use them probabilistically.",
+          afl->max_det_extras);
 
   }
 
@@ -320,9 +336,10 @@ void load_extras(afl_state_t *afl, u8 *dir) {
     if (st.st_size > MAX_DICT_FILE) {
 
       WARNF(
-          "Extra '%s' is very big (%s, limit is %s)", fn,
+          "Extra '%s' is too big (%s, limit is %s)", fn,
           stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), st.st_size),
           stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
+      continue;
 
     }
 
@@ -370,16 +387,74 @@ static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {
 
 }
 
-/* Adds a new extra / dict entry. Used for LTO autodict. */
+/* Removes duplicates from the loaded extras. This can happen if multiple files
+   are loaded */
+
+void dedup_extras(afl_state_t *afl) {
+
+  if (afl->extras_cnt < 2) return;
+
+  u32 i, j, orig_cnt = afl->extras_cnt;
+
+  for (i = 0; i < afl->extras_cnt - 1; i++) {
+
+    for (j = i + 1; j < afl->extras_cnt; j++) {
+
+    restart_dedup:
+
+      // if the goto was used we could be at the end of the list
+      if (j >= afl->extras_cnt || afl->extras[i].len != afl->extras[j].len)
+        break;
+
+      if (memcmp(afl->extras[i].data, afl->extras[j].data,
+                 afl->extras[i].len) == 0) {
+
+        ck_free(afl->extras[j].data);
+        if (j + 1 < afl->extras_cnt)  // not at the end of the list?
+          memmove((char *)&afl->extras[j], (char *)&afl->extras[j + 1],
+                  (afl->extras_cnt - j - 1) * sizeof(struct extra_data));
+        afl->extras_cnt--;
+        goto restart_dedup;  // restart if several duplicates are in a row
+
+      }
+
+    }
+
+  }
+
+  if (afl->extras_cnt != orig_cnt)
+    afl->extras = afl_realloc((void **)&afl->extras,
+                              afl->extras_cnt * sizeof(struct extra_data));
+
+}
+
+/* Adds a new extra / dict entry. */
 void add_extra(afl_state_t *afl, u8 *mem, u32 len) {
 
-  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
+  u8  val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
+  u32 i, found = 0;
+
+  for (i = 0; i < afl->extras_cnt; i++) {
+
+    if (afl->extras[i].len == len) {
+
+      if (memcmp(afl->extras[i].data, mem, len) == 0) return;
+      found = 1;
+
+    } else {
+
+      if (found) break;
+
+    }
+
+  }
 
   if (len > MAX_DICT_FILE) {
 
-    WARNF("Extra '%.*s' is very big (%s, limit is %s)", (int)len, mem,
+    WARNF("Extra '%.*s' is too big (%s, limit is %s)", (int)len, mem,
           stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), len),
           stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
+    return;
 
   } else if (len > 32) {
 
@@ -405,8 +480,8 @@ void add_extra(afl_state_t *afl, u8 *mem, u32 len) {
 
   if (afl->extras_cnt == afl->max_det_extras + 1) {
 
-    OKF("More than %d tokens - will use them probabilistically.",
-        afl->max_det_extras);
+    WARNF("More than %d tokens - will use them probabilistically.",
+          afl->max_det_extras);
 
   }
 
@@ -609,7 +684,7 @@ void load_auto(afl_state_t *afl) {
 
   } else {
 
-    OKF("No auto-generated dictionary tokens to reuse.");
+    ACTF("No auto-generated dictionary tokens to reuse.");
 
   }
 
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 102f04b9..713849a1 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -611,17 +611,17 @@ void read_foreign_testcases(afl_state_t *afl, int first) {
 /* Read all testcases from the input directory, then queue them for testing.
    Called at startup. */
 
-void read_testcases(afl_state_t *afl) {
+void read_testcases(afl_state_t *afl, u8 *directory) {
 
   struct dirent **nl;
-  s32             nl_cnt;
+  s32             nl_cnt, subdirs = 1;
   u32             i;
-  u8 *            fn1;
-
+  u8 *            fn1, *dir = directory;
   u8 val_buf[2][STRINGIFY_VAL_SIZE_MAX];
 
   /* Auto-detect non-in-place resumption attempts. */
 
+if (dir == NULL) {
   fn1 = alloc_printf("%s/queue", afl->in_dir);
   if (!access(fn1, F_OK)) {
 
@@ -632,16 +632,18 @@ void read_testcases(afl_state_t *afl) {
     ck_free(fn1);
 
   }
+  dir = afl->in_dir;
+}
 
-  ACTF("Scanning '%s'...", afl->in_dir);
+  ACTF("Scanning '%s'...", dir);
 
   /* We use scandir() + alphasort() rather than readdir() because otherwise,
      the ordering of test cases would vary somewhat randomly and would be
      difficult to control. */
 
-  nl_cnt = scandir(afl->in_dir, &nl, NULL, alphasort);
+  nl_cnt = scandir(dir, &nl, NULL, alphasort);
 
-  if (nl_cnt < 0) {
+  if (nl_cnt < 0 && directory == NULL) {
 
     if (errno == ENOENT || errno == ENOTDIR) {
 
@@ -656,7 +658,7 @@ void read_testcases(afl_state_t *afl) {
 
     }
 
-    PFATAL("Unable to open '%s'", afl->in_dir);
+    PFATAL("Unable to open '%s'", dir);
 
   }
 
@@ -674,19 +676,29 @@ void read_testcases(afl_state_t *afl) {
     u8 dfn[PATH_MAX];
     snprintf(dfn, PATH_MAX, "%s/.state/deterministic_done/%s", afl->in_dir,
              nl[i]->d_name);
-    u8 *fn2 = alloc_printf("%s/%s", afl->in_dir, nl[i]->d_name);
+    u8 *fn2 = alloc_printf("%s/%s", dir, nl[i]->d_name);
 
     u8 passed_det = 0;
 
-    free(nl[i]);                                             /* not tracked */
-
     if (lstat(fn2, &st) || access(fn2, R_OK)) {
 
       PFATAL("Unable to access '%s'", fn2);
 
     }
 
-    /* This also takes care of . and .. */
+    /* obviously we want to skip "descending" into . and .. directories,
+       however it is a good idea to skip also directories that start with
+       a dot */
+    if (subdirs && S_ISDIR(st.st_mode) && nl[i]->d_name[0] != '.') {
+
+      free(nl[i]);                                           /* not tracked */
+      read_testcases(afl, fn2);
+      ck_free(fn2);
+      continue;
+
+    }
+
+    free(nl[i]);
 
     if (!S_ISREG(st.st_mode) || !st.st_size || strstr(fn2, "/README.txt")) {
 
@@ -718,7 +730,7 @@ void read_testcases(afl_state_t *afl) {
 
   free(nl);                                                  /* not tracked */
 
-  if (!afl->queued_paths) {
+  if (!afl->queued_paths && directory == NULL) {
 
     SAYF("\n" cLRD "[-] " cRST
          "Looks like there are no valid test cases in the input directory! The "
@@ -985,6 +997,76 @@ void perform_dry_run(afl_state_t *afl) {
 
   }
 
+  /* Now we remove all entries from the queue that have a duplicate trace map */
+
+  q = afl->queue;
+  struct queue_entry *p, *prev = NULL;
+  int                 duplicates = 0;
+
+restart_outer_cull_loop:
+
+  while (q) {
+
+    if (q->cal_failed || !q->exec_cksum) continue;
+
+  restart_inner_cull_loop:
+
+    p = q->next;
+
+    while (p) {
+
+      if (!p->cal_failed && p->exec_cksum == q->exec_cksum) {
+
+        duplicates = 1;
+        --afl->pending_not_fuzzed;
+
+        // We do not remove any of the memory allocated because for
+        // splicing the data might still be interesting.
+        // We only decouple them from the linked list.
+        // This will result in some leaks at exit, but who cares.
+
+        // we keep the shorter file
+        if (p->len >= q->len) {
+
+          q->next = p->next;
+          goto restart_inner_cull_loop;
+
+        } else {
+
+          if (prev)
+            prev->next = q = p;
+          else
+            afl->queue = q = p;
+          goto restart_outer_cull_loop;
+
+        }
+
+      }
+
+      p = p->next;
+
+    }
+
+    prev = q;
+    q = q->next;
+
+  }
+
+  if (duplicates) {
+
+    afl->max_depth = 0;
+    q = afl->queue;
+    while (q) {
+
+      if (q->depth > afl->max_depth) afl->max_depth = q->depth;
+      q = q->next;
+
+    }
+
+    afl->q_prev100 = afl->queue = afl->queue_top = afl->queue;
+
+  }
+
   OKF("All test cases processed.");
 
 }
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index bf568c38..5737c1f5 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -1707,20 +1707,8 @@ custom_mutator_stage:
 
           } while (tid == afl->current_entry && afl->queued_paths > 1);
 
-          target = afl->queue;
-
-          while (tid >= 100) {
-
-            target = target->next_100;
-            tid -= 100;
-
-          }
-
-          while (tid--) {
-
-            target = target->next;
-
-          }
+          afl->splicing_with = tid;
+          target = afl->queue_buf[tid];
 
           /* Make sure that the target has a reasonable length. */
 
@@ -4518,20 +4506,7 @@ pacemaker_fuzzing:
         } while (tid == afl->current_entry);
 
         afl->splicing_with = tid;
-        target = afl->queue;
-
-        while (tid >= 100) {
-
-          target = target->next_100;
-          tid -= 100;
-
-        }
-
-        while (tid--) {
-
-          target = target->next;
-
-        }
+        target = afl->queue_buf[tid];
 
         /* Make sure that the target has a reasonable length. */
 
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index c6d8225f..db91813b 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -239,13 +239,6 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
 
   afl->cycles_wo_finds = 0;
 
-  if (!(afl->queued_paths % 100)) {
-
-    afl->q_prev100->next_100 = q;
-    afl->q_prev100 = q;
-
-  }
-
   struct queue_entry **queue_buf = afl_realloc(
       AFL_BUF_PARAM(queue), afl->queued_paths * sizeof(struct queue_entry *));
   if (unlikely(!queue_buf)) { PFATAL("alloc"); }
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index c12d5db5..bfaa22e8 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -119,8 +119,8 @@ static void usage(u8 *argv0, int more_help) {
       "etc.)\n"
       "  -d            - quick & dirty mode (skips deterministic steps)\n"
       "  -n            - fuzz without instrumentation (non-instrumented mode)\n"
-      "  -x dict_file  - optional fuzzer dictionary (see README.md, its really "
-      "good!)\n\n"
+      "  -x dict_file  - fuzzer dictionary (see README.md, specify up to 4 "
+      "times)\n\n"
 
       "Testing settings:\n"
       "  -s seed       - use a fixed seed for the RNG\n"
@@ -243,11 +243,11 @@ static int stricmp(char const *a, char const *b) {
 
 int main(int argc, char **argv_orig, char **envp) {
 
-  s32    opt;
+  s32    opt, i;
   u64    prev_queued = 0;
   u32    sync_interval_cnt = 0, seek_to, show_help = 0, map_size = MAP_SIZE;
-  u8 *   extras_dir = 0;
-  u8     mem_limit_given = 0, exit_1 = 0, debug = 0;
+  u8 *   extras_dir[4];
+  u8     mem_limit_given = 0, exit_1 = 0, debug = 0, extras_dir_cnt = 0;
   char **use_argv;
 
   struct timeval  tv;
@@ -450,8 +450,13 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'x':                                               /* dictionary */
 
-        if (extras_dir) { FATAL("Multiple -x options not supported"); }
-        extras_dir = optarg;
+        if (extras_dir_cnt >= 4) {
+
+          FATAL("More than four -x options are not supported");
+
+        }
+
+        extras_dir[extras_dir_cnt++] = optarg;
         break;
 
       case 't': {                                                /* timeout */
@@ -828,10 +833,6 @@ int main(int argc, char **argv_orig, char **envp) {
       "Eißfeldt, Andrea Fioraldi and Dominik Maier");
   OKF("afl++ is open source, get it at "
       "https://github.com/AFLplusplus/AFLplusplus");
-  OKF("Power schedules from github.com/mboehme/aflfast");
-  OKF("Python Mutator and llvm_mode instrument file list from "
-      "github.com/choller/afl");
-  OKF("MOpt Mutator from github.com/puppet-meteor/MOpt-AFL");
 
   if (afl->sync_id && afl->is_main_node &&
       afl->afl_env.afl_custom_mutator_only) {
@@ -1139,7 +1140,15 @@ int main(int argc, char **argv_orig, char **envp) {
 
   pivot_inputs(afl);
 
-  if (extras_dir) { load_extras(afl, extras_dir); }
+  if (extras_dir_cnt) {
+
+    for (i = 0; i < extras_dir_cnt; i++)
+      load_extras(afl, extras_dir[i]);
+
+    dedup_extras(afl);
+    OKF("Loaded a total of %u extras.", afl->extras_cnt);
+
+  }
 
   if (!afl->timeout_given) { find_timeout(afl); }