From 9548af52b266ecc2aed81f388f7a1a7a3fcfb181 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 14 Jan 2023 09:30:25 +0100
Subject: texts

---
 custom_mutators/autotokens/README | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 custom_mutators/autotokens/README

(limited to 'custom_mutators/autotokens/README')
diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
new file mode 100644
index 00000000..6849279e
--- /dev/null
+++ b/custom_mutators/autotokens/README
@@ -0,0 +1,12 @@
+# autotokens
+
+This implements an improved autotoken idea presented in
+[Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf].
+It is a grammar fuzzer without actually knowing the grammar.
+
+It is recommended to run with together in an instance with `CMPLOG`.
+
+If you have a dictionary (`-x`) this improves this custom grammar mutator.
+
+If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`,
+to concentrate on grammar bug classes.
-- 
cgit 1.4.1


From 33f41e3974348d3b0b71b3a30a6483bb0418068c Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Tue, 17 Jan 2023 09:52:35 +0100
Subject: autotokens: print stats at exit

---
 custom_mutators/autotokens/README         |  7 ++++---
 custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++
 include/config.h                          |  4 ++--
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 6849279e..0dcc6a3e 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -1,6 +1,6 @@
 # autotokens
 
-This implements an improved autotoken idea presented in
+This implements an improved autotoken grammar fuzzing idea presented in
 [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf].
 It is a grammar fuzzer without actually knowing the grammar.
 
@@ -8,5 +8,6 @@ It is recommended to run with together in an instance with `CMPLOG`.
 
 If you have a dictionary (`-x`) this improves this custom grammar mutator.
 
-If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`,
-to concentrate on grammar bug classes.
+If **not** running with `CMPLOG`, it is possible to set
+`AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes.
+
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 850692a1..d6b269fd 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -35,6 +35,7 @@ static u32        valid_structures;
 static u32        whitespace_ids;
 static u32        extras_cnt, a_extras_cnt;
 static u64        all_spaces, all_tabs, all_lf, all_ws;
+static u64        all_structure_items;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
@@ -519,6 +520,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     file_mapping[fn] = structure;
     s = structure;
     ++valid_structures;
+    all_structure_items += structure->size();
 
     // we are done!
     DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
@@ -586,6 +588,16 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
 extern "C" void afl_custom_deinit(my_mutator_t *data) {
 
+  /* we use this to print statistics at exit :-)
+     needs to be stderr as stdout is filtered */
+
+  fprintf(stderr,
+          "\n\nAutotoken mutator statistics:\n"
+          "  Number of all seen tokens:  %lu\n"
+          "  Number of input structures: %lu\n"
+          "  Number of all items in structures: %lu\n\n",
+          current_id - 1, valid_structures, all_structure_items);
+
   free(data);
 
 }
diff --git a/include/config.h b/include/config.h
index 6cfaac11..f8a742f2 100644
--- a/include/config.h
+++ b/include/config.h
@@ -364,9 +364,9 @@
  *                                                         *
  ***********************************************************/
 
-/* Call count interval between reseeding the libc PRNG from /dev/urandom: */
+/* Call count interval between reseeding the PRNG from /dev/urandom: */
 
-#define RESEED_RNG 100000
+#define RESEED_RNG 2500000
 
 /* The default maximum testcase cache size in MB, 0 = disable.
    A value between 50 and 250 is a good default value. Note that the
-- 
cgit 1.4.1


From a41fd5cc5c4a5073f38adf06270e2985c88da9d5 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 11:46:28 +0100
Subject: alternate tokenize, options

---
 custom_mutators/autotokens/README         |   9 +
 custom_mutators/autotokens/autotokens.cpp | 432 ++++++++++++++++++++++++------
 2 files changed, 365 insertions(+), 76 deletions(-)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 0dcc6a3e..f6e9c753 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -11,3 +11,12 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator.
 If **not** running with `CMPLOG`, it is possible to set
 `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes.
 
+## Configuration via environment variables
+
+`AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items
+`AUTOTOKENS_COMMENT` - what character or string starts a comment which will be
+                       removed. Default: `/* ... */`
+`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
+                                   (experimental)
+`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
+                          default is " "
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 5580512a..28ef91e2 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -15,7 +15,10 @@ extern "C" {
 #include <regex>
 
 #define AUTOTOKENS_DEBUG 0
+#define AUTOTOKENS_ONLY_FAV 0
+#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
 #define AUTOTOKENS_CHANGE_MIN 8
+#define AUTOTOKENS_WHITESPACE " "
 
 using namespace std;
 
@@ -30,6 +33,8 @@ typedef struct my_mutator {
 
 static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
+static int        only_fav = AUTOTOKENS_ONLY_FAV;
+static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -39,9 +44,12 @@ static u64        all_structure_items;
 static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
-// static regex        regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static string                               whitespace = AUTOTOKENS_WHITESPACE;
+static regex                               *regex_comment_custom;
 static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/",
                                 regex::multiline | regex::optimize);
+static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex regex_whitespace(R"([ \t]+)", regex::optimize);
 static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
 
@@ -84,15 +92,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                                afl_ptr->havoc_div / 256));
   // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
-  u32 max_rand = 4;
+  u32 max_rand = 7;
 
   for (i = 0; i < rounds; ++i) {
 
     switch (rand_below(afl_ptr, max_rand)) {
 
       /* CHANGE */
-      case 0:                                               /* fall through */
-      case 1: {
+      case 0 ... 3:                                         /* fall through */
+      {
 
         u32 pos = rand_below(afl_ptr, m_size);
         u32 cur_item = m[pos], new_item;
@@ -103,8 +111,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         } while (unlikely(
 
             new_item == cur_item ||
-            (whitespace_ids < new_item && whitespace_ids >= cur_item) ||
-            (whitespace_ids >= new_item && whitespace_ids < cur_item)));
+            (!alternative_tokenize &&
+             ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
+              (whitespace_ids >= new_item && whitespace_ids < cur_item)))));
 
         DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item);
         m[pos] = new_item;
@@ -113,7 +122,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       }
 
       /* INSERT (m_size +1 so we insert also after last place) */
-      case 2: {
+      case 4 ... 5: {
 
         u32 new_item;
         do {
@@ -126,26 +135,30 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         m.insert(m.begin() + pos, new_item);
         ++m_size;
 
-        // if we insert an identifier or string we might need whitespace
-        if (id_to_token[new_item].size() > 1) {
+        if (likely(!alternative_tokenize)) {
 
-          // need to insert before?
+          // if we insert an identifier or string we might need whitespace
+          if (id_to_token[new_item].size() > 1) {
 
-          if (pos && m[pos - 1] >= whitespace_ids &&
-              id_to_token[m[pos - 1]].size() > 1) {
+            // need to insert before?
 
-            m.insert(m.begin() + pos, good_whitespace_or_singleval());
-            ++m_size;
+            if (pos && m[pos - 1] >= whitespace_ids &&
+                id_to_token[m[pos - 1]].size() > 1) {
 
-          }
+              m.insert(m.begin() + pos, good_whitespace_or_singleval());
+              ++m_size;
+
+            }
+
+            if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
+                id_to_token[m[pos + 1]].size() > 1) {
 
-          if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
-              id_to_token[m[pos + 1]].size() > 1) {
+              // need to insert after?
 
-            // need to insert after?
+              m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
+              ++m_size;
 
-            m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
-            ++m_size;
+            }
 
           }
 
@@ -156,7 +169,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       }
 
       /* ERASE - only if large enough */
-      case 3: {
+      case 6: {
 
         if (m_size > 8) {
 
@@ -165,7 +178,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         } else {
 
-          max_rand = 3;
+          max_rand = 6;
 
         }
 
@@ -180,10 +193,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   }
 
   string output;
+  u32    m_size_1 = m_size - 1;
 
   for (i = 0; i < m_size; ++i) {
 
     output += id_to_token[m[i]];
+    if (unlikely(alternative_tokenize && i < m_size_1)) {
+
+      output += whitespace;
+
+    }
 
   }
 
@@ -219,7 +238,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
   if (likely(!debug)) {
 
-    if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) {
+    if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
+        (only_fav && !afl_ptr->queue_cur->favored)) {
 
       s = NULL;
       return 0;
@@ -353,8 +373,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
     // input.size(), filename, input.c_str());
 
-    // input = regex_replace(input, regex_comment_slash, "$2");
-    input = regex_replace(input, regex_comment_star, "");
+    if (regex_comment_custom) {
+
+      input = regex_replace(input, *regex_comment_custom, "$2");
+
+    } else {
+
+      input = regex_replace(input, regex_comment_star, "");
+
+    }
 
     DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
           filename, input.c_str());
@@ -377,53 +404,105 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     DEBUG(stderr, "START!\n");
 
-    while (regex_search(cur, ende, match, regex_string,
-                        regex_constants::match_any |
-                            regex_constants::match_not_null |
-                            regex_constants::match_continuous)) {
+    if (likely(!alternative_tokenize)) {
+
+      while (regex_search(cur, ende, match, regex_string,
+                          regex_constants::match_any |
+                              regex_constants::match_not_null |
+                              regex_constants::match_continuous)) {
+
+        prev = cur;
+        found = match[0].first;
+        cur = match[0].second;
+        DEBUG(stderr,
+              "string %s found at start %lu offset %lu continue at %lu\n",
+              match[0].str().c_str(), prev - input.begin(), match.position(),
+              cur - input.begin());
+
+        if (prev < found) {  // there are items between search start and find
+          while (prev < found) {
 
-      prev = cur;
-      found = match[0].first;
-      cur = match[0].second;
-      DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n",
-            match[0].str().c_str(), prev - input.begin(), match.position(),
-            cur - input.begin());
+            if (isspace(*prev)) {
 
-      if (prev < found) {  // there are items between search start and find
-        while (prev < found) {
+              auto start = prev;
+              while (isspace(*prev)) {
 
-          if (isspace(*prev)) {
+                ++prev;
 
-            auto start = prev;
-            while (isspace(*prev)) {
+              }
 
+              tokens.push_back(std::string(start, prev));
+              DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+                    tokens[tokens.size() - 1].c_str());
+
+            } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
+
+              auto start = prev;
+              while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
+                     *prev == '.' || *prev == '/') {
+
+                ++prev;
+
+              }
+
+              tokens.push_back(std::string(start, prev));
+              DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+                    tokens[tokens.size() - 1].c_str());
+
+            } else {
+
+              tokens.push_back(std::string(prev, prev + 1));
+              DEBUG(stderr, "OTHER \"%c\"\n", *prev);
               ++prev;
 
             }
 
-            tokens.push_back(std::string(start, prev));
-            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+          }
+
+        }
+
+        if (match[0].length() > 0) { tokens.push_back(match[0]); }
+
+      }
+
+      DEBUG(stderr, "AFTER all strings\n");
+
+      if (cur < ende) {
+
+        while (cur < ende) {
+
+          if (isspace(*cur)) {
+
+            auto start = cur;
+            while (isspace(*cur)) {
+
+              ++cur;
+
+            }
+
+            tokens.push_back(std::string(start, cur));
+            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
                   tokens[tokens.size() - 1].c_str());
 
-          } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
+          } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
 
-            auto start = prev;
-            while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
-                   *prev == '.' || *prev == '/') {
+            auto start = cur;
+            while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
+                   *cur == '/') {
 
-              ++prev;
+              ++cur;
 
             }
 
-            tokens.push_back(std::string(start, prev));
-            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+            tokens.push_back(std::string(start, cur));
+            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
                   tokens[tokens.size() - 1].c_str());
 
           } else {
 
-            tokens.push_back(std::string(prev, prev + 1));
-            DEBUG(stderr, "OTHER \"%c\"\n", *prev);
-            ++prev;
+            tokens.push_back(std::string(cur, cur + 1));
+            DEBUG(stderr, "OTHER \"%c\"\n", *cur);
+            ++cur;
 
           }
 
@@ -431,48 +510,227 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       }
 
-      if (match[0].length() > 0) { tokens.push_back(match[0]); }
+    } else {
 
-    }
+      // alternative tokenize
 
-    DEBUG(stderr, "AFTER all strings\n");
+      while (regex_search(cur, ende, match, regex_string)) {
 
-    if (cur < ende) {
+        prev = cur;
+        found = match[0].first;
+        cur = match[0].second;
+        DEBUG(stderr,
+              "string %s found at start %lu offset %lu continue at %lu\n",
+              match[0].str().c_str(), prev - input.begin(), match.position(),
+              cur - input.begin());
+        if (prev < found) {  // there are items between search start and find
+          sregex_token_iterator it{prev, found, regex_whitespace, -1};
+          vector<std::string>   tokenized{it, {}};
+          tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(),
+                                         [](std::string const &s) {
 
-      while (cur < ende) {
+                                           return s.size() == 0;
 
-        if (isspace(*cur)) {
+                                         }),
 
-          auto start = cur;
-          while (isspace(*cur)) {
+                          tokenized.end());
+          tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
 
-            ++cur;
+          if (unlikely(debug)) {
+
+            DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
+                  input.size());
+            for (auto x : tokenized) {
+
+              cerr << x << endl;
+
+            }
 
           }
 
-          tokens.push_back(std::string(start, cur));
-          DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
-                tokens[tokens.size() - 1].c_str());
+          for (auto token : tokenized) {
 
-        } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
+            string::const_iterator c = token.begin(), e = token.end(), f, p;
+            smatch                 m;
 
-          auto start = cur;
-          while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
-                 *cur == '/') {
+            while (regex_search(c, e, m, regex_word)) {
 
-            ++cur;
+              p = c;
+              f = m[0].first;
+              c = m[0].second;
+              if (p < f) {
+
+                // there are items between search start and find
+                while (p < f) {
+
+                  if (unlikely(debug)) {
+
+                    string foo(p, p + 1);
+                    DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+
+                  }
+
+                  tokens.push_back(std::string(p, p + 1));
+                  ++p;
+
+                }
+
+                /*
+                                string foo(p, f);
+                                DEBUG(stderr, "before string: \"%s\"\n",
+                   foo.c_str()); tokens.push_back(std::string(p, f));
+                */
+
+              }
+
+              DEBUG(
+                  stderr,
+                  "SUBstring \"%s\" found at start %lu offset %lu continue at "
+                  "%lu\n",
+                  m[0].str().c_str(), p - input.begin(), m.position(),
+                  c - token.begin());
+              tokens.push_back(m[0].str());
+
+            }
+
+            if (c < e) {
+
+              while (c < e) {
+
+                if (unlikely(debug)) {
+
+                  string foo(c, c + 1);
+                  DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+                }
+
+                tokens.push_back(std::string(c, c + 1));
+                ++c;
+
+              }
+
+              /*
+                            if (unlikely(debug)) {
+
+                              string foo(c, e);
+                              DEBUG(stderr, "after string: \"%s\"\n",
+                 foo.c_str());
+
+                            }
+
+                            tokens.push_back(std::string(c, e));
+              */
+
+            }
 
           }
 
-          tokens.push_back(std::string(start, cur));
-          DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
-                tokens[tokens.size() - 1].c_str());
+        }
+
+        if (match[0].length() > 0) { tokens.push_back(match[0]); }
 
-        } else {
+      }
+
+      if (cur < ende) {
+
+        sregex_token_iterator it{cur, ende, regex_whitespace, -1};
+        vector<std::string>   tokenized{it, {}};
+        tokenized.erase(
+            std::remove_if(tokenized.begin(), tokenized.end(),
+                           [](std::string const &s) { return s.size() == 0; }),
+            tokenized.end());
+        tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+
+        if (unlikely(debug)) {
+
+          DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
+                input.size());
+          for (auto x : tokenized) {
+
+            cerr << x << endl;
 
-          tokens.push_back(std::string(cur, cur + 1));
-          DEBUG(stderr, "OTHER \"%c\"\n", *cur);
-          ++cur;
+          }
+
+        }
+
+        for (auto token : tokenized) {
+
+          string::const_iterator c = token.begin(), e = token.end(), f, p;
+          smatch                 m;
+
+          while (regex_search(c, e, m, regex_word)) {
+
+            p = c;
+            f = m[0].first;
+            c = m[0].second;
+            if (p < f) {
+
+              // there are items between search start and find
+              while (p < f) {
+
+                if (unlikely(debug)) {
+
+                  string foo(p, p + 1);
+                  DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+
+                }
+
+                tokens.push_back(std::string(p, p + 1));
+                ++p;
+
+              }
+
+              /*
+                            if (unlikely(debug)) {
+
+                              string foo(p, f);
+                              DEBUG(stderr, "before string: \"%s\"\n",
+                 foo.c_str());
+
+                            }
+
+                            tokens.push_back(std::string(p, f));
+              */
+
+            }
+
+            DEBUG(stderr,
+                  "SUB2string \"%s\" found at start %lu offset %lu continue at "
+                  "%lu\n",
+                  m[0].str().c_str(), p - input.begin(), m.position(),
+                  c - token.begin());
+            tokens.push_back(m[0].str());
+
+          }
+
+          if (c < e) {
+
+            while (c < e) {
+
+              if (unlikely(debug)) {
+
+                string foo(c, c + 1);
+                DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+              }
+
+              tokens.push_back(std::string(c, c + 1));
+              ++c;
+
+            }
+
+            /*
+                        if (unlikely(debug)) {
+
+                          string foo(c, e);
+                          DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+                        }
+
+                        tokens.push_back(std::string(c, e));
+            */
+
+          }
 
         }
 
@@ -483,9 +741,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     if (unlikely(debug)) {
 
       DEBUG(stderr, "DUMPING TOKENS:\n");
+      u32 size_1 = tokens.size() - 1;
       for (u32 i = 0; i < tokens.size(); ++i) {
 
         DEBUG(stderr, "%s", tokens[i].c_str());
+        if (unlikely(alternative_tokenize && i < size_1)) {
+
+          DEBUG(stderr, "%s", whitespace.c_str());
+
+        }
 
       }
 
@@ -556,6 +820,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   }
 
+  if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
+  if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
+  if (getenv("AUTOTOKENS_WHITESPACE")) {
+
+    whitespace = getenv("AUTOTOKENS_WHITESPACE");
+
+  }
+
+  if (getenv("AUTOTOKENS_COMMENT")) {
+
+    char buf[256];
+    snprintf(buf, sizeof(buf), "(%s.*)([\r\n]?)", getenv("AUTOTOKENS_COMMENT"));
+    regex_comment_custom = new regex(buf, regex::optimize);
+
+  }
+
   data->afl = afl_ptr = afl;
 
   // set common whitespace tokens
-- 
cgit 1.4.1


From 17752465e6b3c70fd0104fae7bb1f84c1cb8bb66 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 22:31:55 +0100
Subject: nit

---
 custom_mutators/autotokens/README         | 2 ++
 custom_mutators/autotokens/TODO           | 8 +-------
 custom_mutators/autotokens/autotokens.cpp | 7 ++-----
 3 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index f6e9c753..f82dcd98 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -11,6 +11,8 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator.
 If **not** running with `CMPLOG`, it is possible to set
 `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes.
 
+Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
+
 ## Configuration via environment variables
 
 `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 2e5e384f..95b79373 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -1,12 +1,6 @@
-whitespace belassen oder notieren?		MAYBE
-0=space 1=tab 2=linefeed
-
 cmplog: only add tokens that were found to fit?
 
 create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
-splice insert, splice overwrite
-(linefeed, semicolon)
-
-
+splicing -> check if whitespace/token is needed
\ No newline at end of file
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index c9ec4352..5e683455 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -217,11 +217,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
                         MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN,
                             src_size - src_off - AUTOTOKENS_SPLICE_MIN)));
 
-            for (u32 i = 0; i < n; ++i) {
-
-              m[dst_off + i] = (*src)[src_off + i];
-
-            }
+            copy(src->begin() + src_off, src->begin() + src_off + n,
+                 m.begin() + dst_off);
 
             DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
             break;
-- 
cgit 1.4.1


From ec87abda93d68f489f26ed2a2ae75b4f1e26d0bb Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sat, 4 Feb 2023 14:37:28 +0100
Subject: readme

---
 custom_mutators/autotokens/README | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index f82dcd98..86e7c9b3 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -18,6 +18,12 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
 `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items
 `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be
                        removed. Default: `/* ... */`
+`AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting
+                                the value by this number set, e.g. 1.
+`AUTOTOKENS_LEARN_DICT` - learn from dictionaries?
+                          0 = none
+                          1 = only -x or autodict
+                          2 = -x, autodict and `CMPLOG`
 `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
                                    (experimental)
 `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
-- 
cgit 1.4.1


From e6120282556e4df79c01236849e5f6f225b8e428 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Sun, 5 Feb 2023 14:19:10 +0100
Subject: dict fix

---
 custom_mutators/autotokens/README         |  3 +++
 custom_mutators/autotokens/autotokens.cpp | 22 +++++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 86e7c9b3..d8613232 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -24,6 +24,9 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
                           0 = none
                           1 = only -x or autodict
                           2 = -x, autodict and `CMPLOG`
+`AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and
+                                    a dictionary loaded then create one initial
+                                    structure based on the dictionary.
 `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
                                    (experimental)
 `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index f1263600..d3ae7e9c 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -29,7 +29,7 @@ extern "C" {
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
-#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1
+#define AUTOTOKENS_CREATE_FROM_THIN_AIR 0
 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
 #define AUTOTOKENS_LEARN_DICT 2
@@ -506,14 +506,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
   }
 
   // check if there are new dictionary entries and add them to the tokens
-  if (valid_structures && learn_state < learn_dictionary_tokens) {
+  if (likely(valid_structures || create_from_thin_air) &&
+      learn_state < learn_dictionary_tokens) {
 
     if (unlikely(!learn_state)) { learn_state = 1; }
 
     while (extras_cnt < afl_ptr->extras_cnt) {
 
       u32 ok = 1, l = afl_ptr->extras[extras_cnt].len;
-      u8 *ptr = afl_ptr->extras[extras_cnt].data;
+      u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data;
 
       for (u32 i = 0; i < l; ++i) {
 
@@ -528,14 +529,17 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       if (ok) {
 
-        token_to_id[(char *)ptr] = current_id;
-        id_to_token[current_id] = (char *)ptr;
+        buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1);
+        memcpy(buf, afl_ptr->extras[extras_cnt].data,
+               afl_ptr->extras[extras_cnt].len);
+        buf[afl_ptr->extras[extras_cnt].len] = 0;
+        token_to_id[(char *)buf] = current_id;
+        id_to_token[current_id] = (char *)buf;
         ++current_id;
 
       }
 
       ++extras_cnt;
-      DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -600,8 +604,12 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       }
 
-      file_mapping[fn] = structure;
       s = structure;
+      file_mapping[fn] = structure;
+      id_mapping[valid_structures] = structure;
+      ++valid_structures;
+      all_structure_items += structure->size();
+
       return 1;
 
     }
-- 
cgit 1.4.1


From 8a2547073c500fcd637a7b276b7a38313bb70b5f Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 6 Feb 2023 08:51:20 +0100
Subject: more options

---
 custom_mutators/autotokens/README         |  2 ++
 custom_mutators/autotokens/TODO           |  4 +++-
 custom_mutators/autotokens/autotokens.cpp | 26 ++++++++++++++++++++++----
 3 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index d8613232..e9c48662 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -24,6 +24,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
                           0 = none
                           1 = only -x or autodict
                           2 = -x, autodict and `CMPLOG`
+`AUTOTOKENS_CHANGE_MIN` - minimum number of mutations (1-256, default 8)
+`AUTOTOKENS_CHANGE_MAX` - maximum number of mutations (1-4096, default 64)
 `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and
                                     a dictionary loaded then create one initial
                                     structure based on the dictionary.
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 528dff1f..496bfd45 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -9,7 +9,6 @@ analyse welche einen DICT haben, und welche davon rein ascii
 
 corpus analyse:
 	+ libxml
-	- hardbuzz
 	- sqlite
 	- libpcap
 min len, max len, % wenn 95/98/99/100 ascii
@@ -20,3 +19,6 @@ AFL_TXT_MAX_LEN 65535
 AFL_TXT_MIN_LEN 16
 AFL_TXT_MIN_PERCENT=99
 
+-> KEIN FAV!
+
+change_min/_max werte
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index d3ae7e9c..ee35c68b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -32,7 +32,7 @@ extern "C" {
 #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0
 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
-#define AUTOTOKENS_LEARN_DICT 2
+#define AUTOTOKENS_LEARN_DICT 1
 #ifndef AUTOTOKENS_SPLICE_DISABLE
   #define AUTOTOKENS_SPLICE_DISABLE 0
 #endif
@@ -64,6 +64,8 @@ static int        alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
 static int        learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
 static int        fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
 static int        create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
+static int        change_min = AUTOTOKENS_CHANGE_MIN;
+static int        change_max = AUTOTOKENS_CHANGE_MAX;
 static u32        current_id;
 static u32        valid_structures;
 static u32        whitespace_ids;
@@ -151,8 +153,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   u32         i, m_size = (u32)m.size();
 
   u32 rounds =
-      MIN(AUTOTOKENS_CHANGE_MAX,
-          MAX(AUTOTOKENS_CHANGE_MIN,
+      MIN(change_max,
+          MAX(change_min,
               MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
                                    afl_ptr->havoc_div / 256)));
   // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
@@ -1162,7 +1164,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
     learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT"));
     if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) {
 
-      learn_dictionary_tokens = 2;
+      learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
 
     }
 
@@ -1175,6 +1177,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   }
 
+  if (getenv("AUTOTOKENS_CHANGE_MIN")) {
+
+    change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN"));
+    if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; }
+
+  }
+
+  if (getenv("AUTOTOKENS_CHANGE_MAX")) {
+
+    change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX"));
+    if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; }
+
+  }
+
+  if (change_max < change_min) { change_max = change_min + 1; }
+
   if (getenv("AUTOTOKENS_WHITESPACE")) {
 
     whitespace = getenv("AUTOTOKENS_WHITESPACE");
-- 
cgit 1.4.1


From 61439859cece05cd3e204af60bb5ff08556c490d Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 08:26:30 +0100
Subject: cleanup

---
 custom_mutators/autotokens/README         | 4 ----
 custom_mutators/autotokens/autotokens.cpp | 8 --------
 2 files changed, 12 deletions(-)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index e9c48662..904b5fa3 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -29,7 +29,3 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
 `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and
                                     a dictionary loaded then create one initial
                                     structure based on the dictionary.
-`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation
-                                   (experimental)
-`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE,
-                          default is " "
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index ca738d0b..10afa2c2 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -24,7 +24,6 @@ extern "C" {
 #define AUTOTOKENS_ONLY_FAV 0
 #define AUTOTOKENS_CHANGE_MIN 8
 #define AUTOTOKENS_CHANGE_MAX 64
-#define AUTOTOKENS_WHITESPACE " "
 #define AUTOTOKENS_SIZE_MIN 8
 #define AUTOTOKENS_SPLICE_MIN 4
 #define AUTOTOKENS_SPLICE_MAX 64
@@ -75,7 +74,6 @@ static unordered_map<string, vector<u32> *> file_mapping;
 static unordered_map<u32, vector<u32> *>    id_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
-static string                               whitespace = AUTOTOKENS_WHITESPACE;
 static string                               output;
 static regex                               *regex_comment_custom;
 // multiline requires g++-11 libs :(
@@ -913,12 +911,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
 
   if (change_max < change_min) { change_max = change_min + 1; }
 
-  if (getenv("AUTOTOKENS_WHITESPACE")) {
-
-    whitespace = getenv("AUTOTOKENS_WHITESPACE");
-
-  }
-
   if (getenv("AUTOTOKENS_COMMENT")) {
 
     char buf[256];
-- 
cgit 1.4.1


From 5a0100c6eece0d668c7040ec6e6ed3f59ef0d1ba Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Mon, 13 Feb 2023 10:01:02 +0100
Subject: add to readme

---
 custom_mutators/autotokens/README | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'custom_mutators/autotokens/README')

diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 904b5fa3..295cd736 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -20,6 +20,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
                        removed. Default: `/* ... */`
 `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting
                                 the value by this number set, e.g. 1.
+`AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii
+                            (or no input and no (ascii) dictionary)
 `AUTOTOKENS_LEARN_DICT` - learn from dictionaries?
                           0 = none
                           1 = only -x or autodict
-- 
cgit 1.4.1


From add2eb42c0f0e2b590fcb17427e5fce29c2fdd54 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 23 Feb 2023 15:26:41 +0100
Subject: nits

---
 .gitignore                        | 121 +++++++++++++++++++-------------------
 custom_mutators/autotokens/README |   7 ++-
 custom_mutators/autotokens/TODO   |   3 -
 3 files changed, 66 insertions(+), 65 deletions(-)
 delete mode 100644 custom_mutators/autotokens/TODO

(limited to 'custom_mutators/autotokens/README')

diff --git a/.gitignore b/.gitignore
index 45d8676c..c01750e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,104 +1,107 @@
-.test
-.test2
-.sync_tmp
-.vscode
+!coresight_mode
+!coresight_mode/coresight-trace
+*.dSYM
 *.o
+*.o.tmp
+*.pyc
 *.so
 *.swp
-*.pyc
-*.dSYM
-as
-a.out
-ld
-in
-out
-core*
-compile_commands.json
+.sync_tmp
+.test
+.test2
+.vscode
 afl-analyze
+afl-analyze.8
 afl-as
+afl-as.8
+afl-c++
+afl-c++.8
+afl-cc
+afl-cc.8
 afl-clang
 afl-clang++
 afl-clang-fast
 afl-clang-fast++
+afl-clang-fast++.8
+afl-clang-fast.8
 afl-clang-lto
 afl-clang-lto++
+afl-clang-lto++.8
+afl-clang-lto.8
+afl-cmin.8
+afl-cmin.bash.8
+afl-cs-proxy
+afl-frida-trace.so
 afl-fuzz
+afl-fuzz.8
 afl-g++
+afl-g++.8
 afl-gcc
+afl-gcc.8
 afl-gcc-fast
+afl-gcc-fast.8
 afl-g++-fast
+afl-g++-fast.8
 afl-gotcpu
+afl-gotcpu.8
 afl-ld
 afl-ld-lto
-afl-cs-proxy
+afl-lto
+afl-lto++
+afl-lto++.8
+afl-lto.8
+afl-persistent-config.8
+afl-plot.8
 afl-qemu-trace
 afl-showmap
-afl-tmin
-afl-analyze.8
-afl-as.8
-afl-clang-fast++.8
-afl-clang-fast.8
-afl-clang-lto.8
-afl-clang-lto++.8
-afl-cmin.8
-afl-cmin.bash.8
-afl-fuzz.8
-afl-c++.8
-afl-cc.8
-afl-gcc.8
-afl-g++.8
-afl-gcc-fast.8
-afl-g++-fast.8
-afl-gotcpu.8
-afl-plot.8
 afl-showmap.8
 afl-system-config.8
+afl-tmin
 afl-tmin.8
 afl-whatsup.8
-afl-persistent-config.8
-afl-c++
-afl-cc
-afl-lto
-afl-lto++
-afl-lto++.8
-afl-lto.8
-qemu_mode/libcompcov/compcovtest
-qemu_mode/qemu-*
-qemu_mode/qemuafl
-unicorn_mode/samples/*/\.test-*
-unicorn_mode/samples/*/output/
-test/unittests/unit_maybe_alloc
-test/unittests/unit_preallocable
-test/unittests/unit_list
-test/unittests/unit_rand
-test/unittests/unit_hash
-examples/afl_network_proxy/afl-network-server
-examples/afl_network_proxy/afl-network-client
+a.out
+as
+compile_commands.json
+core*
 examples/afl_frida/afl-frida
-examples/afl_frida/libtestinstr.so
 examples/afl_frida/frida-gum-example.c
 examples/afl_frida/frida-gum.h
+examples/afl_frida/libtestinstr.so
+examples/afl_network_proxy/afl-network-client
+examples/afl_network_proxy/afl-network-server
 examples/aflpp_driver/libAFLDriver.a
 examples/aflpp_driver/libAFLQemuDriver.a
+gmon.out
+in
+ld
 libAFLDriver.a
 libAFLQemuDriver.a
+out
+qemu_mode/libcompcov/compcovtest
+qemu_mode/qemu-*
+qemu_mode/qemuafl
 test/.afl_performance
 test-instr
 test/output
+test/test-c
+test/test-cmplog
+test/test-compcov
 test/test-instr.ts
 test/test-persistent
-gmon.out
-afl-frida-trace.so
+test/unittests/unit_hash
+test/unittests/unit_list
+test/unittests/unit_maybe_alloc
+test/unittests/unit_preallocable
+test/unittests/unit_rand
+unicorn_mode/samples/*/output/
+unicorn_mode/samples/*/\.test-*
 utils/afl_network_proxy/afl-network-client
 utils/afl_network_proxy/afl-network-server
-utils/plot_ui/afl-plot-ui
-*.o.tmp
 utils/afl_proxy/afl-proxy
 utils/optimin/build
 utils/optimin/optimin
 utils/persistent_mode/persistent_demo
 utils/persistent_mode/persistent_demo_new
 utils/persistent_mode/test-instr
-!coresight_mode
-!coresight_mode/coresight-trace
-vuln_prog
\ No newline at end of file
+utils/plot_ui/afl-plot-ui
+vuln_prog
diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README
index 295cd736..cca168fd 100644
--- a/custom_mutators/autotokens/README
+++ b/custom_mutators/autotokens/README
@@ -1,8 +1,9 @@
-# autotokens
+# Autotokens
 
 This implements an improved autotoken grammar fuzzing idea presented in
 [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf].
-It is a grammar fuzzer without actually knowing the grammar.
+It is a grammar fuzzer without actually knowing the grammar, but only works
+with text based inputs.
 
 It is recommended to run with together in an instance with `CMPLOG`.
 
@@ -19,7 +20,7 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator!
 `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be
                        removed. Default: `/* ... */`
 `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting
-                                the value by this number set, e.g. 1.
+                                the value by this number, e.g. 1.
 `AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii
                             (or no input and no (ascii) dictionary)
 `AUTOTOKENS_LEARN_DICT` - learn from dictionaries?
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
deleted file mode 100644
index 2e99e147..00000000
--- a/custom_mutators/autotokens/TODO
+++ /dev/null
@@ -1,3 +0,0 @@
-env für menge an per mutation run
-
-change_min/_max werte
-- 
cgit 1.4.1