2 files changed, 61 insertions, 1 deletions
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 2e39511c..3cae3060 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -4,3 +4,20 @@ create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
 splicing -> check if whitespace/token is needed
+
+whitespace/token check only AFTER mutation
+
+analyse welche einen DICT haben, und welche davon rein ascii
+
+corpus analyse:
+	+ libxml
+	- hardbuzz
+	- sqlite
+	- libpcap
+min len, max len, % wenn 95/98/99/100 ascii
+
+funktion und env für menge an mutationen
+
+env für menge an per mutation run
+
+only add inital dictionary, not furher finds, e.g. cmplog
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 548e1be9..a0125851 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -28,6 +28,9 @@ extern "C" {
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
+#define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
+// 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
+#define AUTOTOKENS_LEARN_DICT 2
 #ifndef AUTOTOKENS_SPLICE_DISABLE
   #define AUTOTOKENS_SPLICE_DISABLE 0
 #endif
@@ -53,6 +56,8 @@ static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
 static int        only_fav = AUTOTOKENS_ONLY_FAV;
 static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
+static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
+static int        fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -94,6 +99,22 @@ u32 good_whitespace_or_singleval() {
 
 }
 
+extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf,
+                                     size_t buf_size) {
+
+  if (s == NULL) return 0;
+
+  u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8;
+  u32 stage_max = (u32)((HAVOC_CYCLES * afl_ptr->queue_cur->perf_score) /
+                        afl_ptr->havoc_div) >>
+                  shift;
+  if (fuzz_count_shift) { stage_max >>= (u32)fuzz_count_shift; };
+  DEBUGF(stderr, "fuzz count: %u\n", stage_max);
+
+  return stage_max;
+
+}
+
 extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                   u8 **out_buf, u8 *add_buf,
                                   size_t add_buf_size, size_t max_size) {
@@ -441,6 +462,7 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
 extern "C" unsigned char afl_custom_queue_get(void                *data,
                                               const unsigned char *filename) {
 
+  static int learn_state;
   (void)(data);
 
   if (likely(!debug)) {
@@ -458,7 +480,9 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
   }
 
   // check if there are new dictionary entries and add them to the tokens
-  if (valid_structures) {
+  if (valid_structures && learn_state < learn_dictionary_tokens) {
+
+    if (unlikely(!learn_state)) { learn_state = 1; }
 
     while (extras_cnt < afl_ptr->extras_cnt) {
 
@@ -1053,6 +1077,25 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
   if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
   if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
   if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
+
+  if (getenv("AUTOTOKENS_LEARN_DICT")) {
+
+    learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT"));
+    if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) {
+
+      learn_dictionary_tokens = 2;
+
+    }
+
+  }
+
+  if (getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT")) {
+
+    fuzz_count_shift = atoi(getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT"));
+    if (fuzz_count_shift < 0 || fuzz_count_shift > 16) { fuzz_count_shift = 0; }
+
+  }
+
   if (getenv("AUTOTOKENS_WHITESPACE")) {
 
     whitespace = getenv("AUTOTOKENS_WHITESPACE");