From 14d8eb9e40a6329abcb2f153174b543349c68c13 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Wed, 18 Jan 2023 22:17:14 +0100
Subject: autotoken: splicing; splice_optout

---
 docs/custom_mutators.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'docs/custom_mutators.md')
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index 4ffeda7a..322caa5b 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -48,6 +48,7 @@ C/C++:
 ```c
 void *afl_custom_init(afl_state_t *afl, unsigned int seed);
 unsigned int afl_custom_fuzz_count(void *data, const unsigned char *buf, size_t buf_size);
+void afl_custom_splice_optout(void *data);
 size_t afl_custom_fuzz(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, unsigned char *add_buf, size_t add_buf_size, size_t max_size);
 const char *afl_custom_describe(void *data, size_t max_description_len);
 size_t afl_custom_post_process(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf);
@@ -72,6 +73,9 @@ def init(seed):
 def fuzz_count(buf):
     return cnt
 
+def splice_optout()
+    pass
+
 def fuzz(buf, add_buf, max_size):
     return mutated_out
 
@@ -132,6 +136,13 @@ def deinit():  # optional for Python
     for a specific queue entry, use this function. This function is most useful
     if `AFL_CUSTOM_MUTATOR_ONLY` is **not** used.
 
+- `splice_optout` (optional):
+
+    If this function is present, no splicing target is passed to the `fuzz`
+    function. This saves time if splicing data is not needed by the custom
+    fuzzing function.
+    This function is never called, just needs to be present to activate.
+
 - `fuzz` (optional):
 
     This method performs custom mutations on a given input. It also accepts an
-- 
cgit 1.4.1


From eeca3a0b2939c605497e9b3a615ee4a466f4a3f2 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 19 Jan 2023 11:52:19 +0100
Subject: lots of fixes

---
 custom_mutators/autotokens/TODO           |   2 +-
 custom_mutators/autotokens/autotokens.cpp | 424 +++++++++++++++++++-----------
 docs/custom_mutators.md                   |   1 +
 include/afl-fuzz.h                        |  11 +-
 src/afl-fuzz-one.c                        |   3 +-
 5 files changed, 279 insertions(+), 162 deletions(-)

(limited to 'docs/custom_mutators.md')

diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 95b79373..2e39511c 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -3,4 +3,4 @@ cmplog: only add tokens that were found to fit?
 create from thin air if no good seed after a cycle and dict large enough?
 (static u32 no_of_struct_inputs;) 
 
-splicing -> check if whitespace/token is needed
\ No newline at end of file
+splicing -> check if whitespace/token is needed
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 4f3289c9..102bea0f 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -38,8 +38,10 @@ typedef struct my_mutator {
 
 } my_mutator_t;
 
-#define DEBUG \
+#undef DEBUGF
+#define DEBUGF \
   if (unlikely(debug)) fprintf
+#define IFDEBUG if (unlikely(debug))
 
 static afl_state *afl_ptr;
 static int        debug = AUTOTOKENS_DEBUG;
@@ -57,12 +59,12 @@ static unordered_map<u32, vector<u32> *>    id_mapping;
 static unordered_map<string, u32>           token_to_id;
 static unordered_map<u32, string>           id_to_token;
 static string                               whitespace = AUTOTOKENS_WHITESPACE;
+static string                               output;
 static regex                               *regex_comment_custom;
-static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/",
-                                regex::multiline | regex::optimize);
-static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
-static regex regex_whitespace(R"([ \t]+)", regex::optimize);
-static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize);
+static regex        regex_comment_star("/\\*([:print:]|\n)*?\\*/",
+                                       regex::multiline | regex::optimize);
+static regex        regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
+static regex        regex_whitespace(R"([ \t]+)", regex::optimize);
 static vector<u32> *s;  // the structure of the currently selected input
 
 u32 good_whitespace_or_singleval() {
@@ -104,7 +106,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       MAX(AUTOTOKENS_CHANGE_MIN,
           MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
                                afl_ptr->havoc_div / 256));
-  // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
+  // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
 
 #if AUTOTOKENS_SPLICE_DISABLE == 1
   #define AUTOTOKENS_MUT_MAX 12
@@ -112,7 +114,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
   #define AUTOTOKENS_MUT_MAX 14
 #endif
 
-  u32 max_rand = AUTOTOKENS_MUT_MAX;
+  u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos;
 
   for (i = 0; i < rounds; ++i) {
 
@@ -122,8 +124,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       case 0 ... 7:                                         /* fall through */
       {
 
-        u32 pos = rand_below(afl_ptr, m_size);
-        u32 cur_item = m[pos], new_item;
+        pos = rand_below(afl_ptr, m_size);
+        u32 cur_item = m[pos];
         do {
 
           new_item = rand_below(afl_ptr, current_id);
@@ -135,7 +137,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
              ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
               (whitespace_ids >= new_item && whitespace_ids < cur_item)))));
 
-        DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item);
+        DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item);
         m[pos] = new_item;
         break;
 
@@ -144,7 +146,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
       /* INSERT (m_size +1 so we insert also after last place) */
       case 8 ... 9: {
 
-        u32 new_item;
         do {
 
           new_item = rand_below(afl_ptr, current_id);
@@ -154,7 +155,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
         u32 pos = rand_below(afl_ptr, m_size + 1);
         m.insert(m.begin() + pos, new_item);
         ++m_size;
-        DEBUG(stderr, "INS: %u at %u\n", new_item, pos);
+        DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
 
         if (likely(!alternative_tokenize)) {
 
@@ -212,7 +213,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             m.insert(m.begin() + dst_off, src->begin() + src_off,
                      src->begin() + src_off + n);
             m_size += n;
-            DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
+            DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off);
+
             break;
 
           }
@@ -231,13 +233,36 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
             copy(src->begin() + src_off, src->begin() + src_off + n,
                  m.begin() + dst_off);
 
-            DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
+            DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off);
             break;
 
           }
 
         }
 
+        if (likely(!alternative_tokenize)) {
+
+          // do we need a whitespace/token at the beginning?
+          if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
+              id_to_token[m[dst_off]].size() > 1) {
+
+            m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
+            ++m_size;
+
+          }
+
+          // do we need a whitespace/token at the end?
+          if (dst_off + n < m_size &&
+              id_to_token[m[dst_off + n - 1]].size() > 1 &&
+              id_to_token[m[dst_off + n]].size() > 1) {
+
+            m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
+            ++m_size;
+
+          }
+
+        }
+
         break;
 
       }
@@ -249,11 +274,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
         if (m_size > 8) {
 
-          m.erase(m.begin() + rand_below(afl_ptr, m_size));
-          --m_size;
+          do {
+
+            pos = rand_below(afl_ptr, m_size);
+
+          } while (unlikely(pos < whitespace_ids));
+
+          // if what we delete will result in a missing whitespace/token,
+          // instead of deleting we switch the item to a whitespace or token.
+          if (likely(!alternative_tokenize) && pos && pos < m_size &&
+              id_to_token[m[pos - 1]].size() > 1 &&
+              id_to_token[m[pos + 1]].size() > 1) {
+
+            m[pos] = good_whitespace_or_singleval();
+
+          } else {
+
+            m.erase(m.begin() + pos);
+            --m_size;
+
+          }
 
         } else {
 
+          // if the data is already too small do not try to make it smaller
+          // again this run.
+
           max_rand = AUTOTOKENS_MUT_MAX - 2;
 
         }
@@ -262,14 +308,12 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
       }
 
-        // TODO: add full line insert splice, replace splace, delete
-
     }
 
   }
 
-  string output;
-  u32    m_size_1 = m_size - 1;
+  u32 m_size_1 = m_size - 1;
+  output = "";
 
   for (i = 0; i < m_size; ++i) {
 
@@ -282,31 +326,108 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
 
   }
 
-  u32 mutated_size = output.size();
-  u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size);
+  u32 mutated_size = (u32)output.size();
+  u8 *mutated_out = (u8 *)output.data();
 
-  if (unlikely(!mutated_out)) {
+  if (unlikely(mutated_size > max_size)) { mutated_size = max_size; }
 
-    *out_buf = NULL;
-    return 0;
-
-  }
-
-  if (unlikely(debug)) {
+  IFDEBUG {
 
-    DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size);
+    DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size);
     fwrite(output.data(), 1, mutated_size, stderr);
-    DEBUG(stderr, "\n---\n");
+    DEBUGF(stderr, "\n---\n");
 
   }
 
-  memcpy(mutated_out, output.data(), mutated_size);
   *out_buf = mutated_out;
   ++fuzz_count;
   return mutated_size;
 
 }
 
+/* I get f*cking stack overflow using C++ regex with a regex of
+   "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize
+   enabled :-( */
+u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
+                    string::const_iterator *match_begin,
+                    string::const_iterator *match_end) {
+
+  string::const_iterator start = cur, found_begin;
+  u8                     quote_type = 0;
+
+  while (cur < ende) {
+
+    switch (*cur) {
+
+      case '"': {
+
+        if (cur == start || *(cur - 1) != '\\') {
+
+          if (!quote_type) {
+
+            found_begin = cur;
+            quote_type = 1;
+
+          } else if (quote_type == 1) {
+
+            *match_begin = found_begin;
+            *match_end = cur + 1;
+            return 1;
+
+          }
+
+        }
+
+        break;
+
+      }
+
+      case '\'': {
+
+        if (cur == start || *(cur - 1) != '\\') {
+
+          if (!quote_type) {
+
+            found_begin = cur;
+            quote_type = 2;
+
+          } else if (quote_type == 2) {
+
+            *match_begin = found_begin;
+            *match_end = cur + 1;
+            return 1;
+
+          }
+
+        }
+
+        break;
+
+      }
+
+      case '\n':
+      case '\r':
+      case 0: {
+
+        quote_type = 0;
+        break;
+
+      }
+
+      default:
+        if (unlikely(quote_type && !isprint(*cur))) { quote_type = 0; }
+        break;
+
+    }
+
+    ++cur;
+
+  }
+
+  return 0;
+
+}
+
 /* We are not using afl_custom_queue_new_entry() because not every corpus entry
    will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
 
@@ -321,7 +442,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
         (only_fav && !afl_ptr->queue_cur->favored)) {
 
       s = NULL;
-      DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n");
+      DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
       return 0;
 
     }
@@ -356,7 +477,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       }
 
       ++extras_cnt;
-      DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr);
+      DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -385,7 +506,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       }
 
       ++a_extras_cnt;
-      DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
+      DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
 
     }
 
@@ -415,7 +536,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
       fclose(fp);
       file_mapping[fn] = structure;  // NULL ptr so we don't read the file again
       s = NULL;
-      DEBUG(stderr, "Too short (%lu) %s\n", len, filename);
+      DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
       return 0;
 
     }
@@ -443,14 +564,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         file_mapping[fn] = NULL;
         s = NULL;
-        DEBUG(stderr, "Not text (%lu) %s\n", len, filename);
+        DEBUGF(stderr, "Not text (%lu) %s\n", len, filename);
         return 0;
 
       }
 
     }
 
-    // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
+    // DEBUGF(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
     // input.size(), filename, input.c_str());
 
     if (regex_comment_custom) {
@@ -463,15 +584,15 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
-    DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
-          filename, input.c_str());
+    DEBUGF(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
+           filename, input.c_str());
 
     u32  spaces = count(input.begin(), input.end(), ' ');
     u32  tabs = count(input.begin(), input.end(), '\t');
     u32  linefeeds = count(input.begin(), input.end(), '\n');
     bool ends_with_linefeed = input[input.length() - 1] == '\n';
-    DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
-          linefeeds, ends_with_linefeed);
+    DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
+           linefeeds, ends_with_linefeed);
     all_spaces += spaces;
     all_tabs += tabs;
     all_lf += linefeeds;
@@ -479,25 +600,28 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     // now extract all tokens
     vector<string>         tokens;
-    smatch                 match;
-    string::const_iterator cur = input.begin(), ende = input.end(), found, prev;
+    string::const_iterator cur = input.begin(), ende = input.end(), found, prev,
+                           match_begin, match_end;
 
-    DEBUG(stderr, "START!\n");
+    DEBUGF(stderr, "START!\n");
 
     if (likely(!alternative_tokenize)) {
 
-      while (regex_search(cur, ende, match, regex_string,
-                          regex_constants::match_any |
-                              regex_constants::match_not_null |
-                              regex_constants::match_continuous)) {
+      while (my_search_string(cur, ende, &match_begin, &match_end)) {
 
         prev = cur;
-        found = match[0].first;
-        cur = match[0].second;
-        DEBUG(stderr,
-              "string %s found at start %lu offset %lu continue at %lu\n",
-              match[0].str().c_str(), prev - input.begin(), match.position(),
-              cur - input.begin());
+        found = match_begin;
+        cur = match_end;
+
+        IFDEBUG {
+
+          string foo(match_begin, match_end);
+          DEBUGF(stderr,
+                 "string %s found at start %lu offset %lu continue at %lu\n",
+                 foo.c_str(), prev - input.begin(), found - prev,
+                 cur - input.begin());
+
+        }
 
         if (prev < found) {  // there are items between search start and find
           while (prev < found) {
@@ -512,8 +636,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
               }
 
               tokens.push_back(std::string(start, prev));
-              DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
-                    tokens[tokens.size() - 1].c_str());
+              DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
+                     tokens[tokens.size() - 1].c_str());
 
             } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
 
@@ -525,14 +649,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               }
 
-              tokens.push_back(std::string(start, prev));
-              DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
-                    tokens[tokens.size() - 1].c_str());
+              tokens.push_back(string(start, prev));
+              DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
+                     tokens[tokens.size() - 1].c_str());
 
             } else {
 
-              tokens.push_back(std::string(prev, prev + 1));
-              DEBUG(stderr, "OTHER \"%c\"\n", *prev);
+              tokens.push_back(string(prev, prev + 1));
+              DEBUGF(stderr, "OTHER \"%c\"\n", *prev);
               ++prev;
 
             }
@@ -541,11 +665,12 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         }
 
-        if (match[0].length() > 0) { tokens.push_back(match[0]); }
+        tokens.push_back(string(match_begin, match_end));
+        DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str());
 
       }
 
-      DEBUG(stderr, "AFTER all strings\n");
+      DEBUGF(stderr, "AFTER all strings\n");
 
       if (cur < ende) {
 
@@ -561,8 +686,8 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             }
 
             tokens.push_back(std::string(start, cur));
-            DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
-                  tokens[tokens.size() - 1].c_str());
+            DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+                   tokens[tokens.size() - 1].c_str());
 
           } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
 
@@ -575,13 +700,13 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             }
 
             tokens.push_back(std::string(start, cur));
-            DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
-                  tokens[tokens.size() - 1].c_str());
+            DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+                   tokens[tokens.size() - 1].c_str());
 
           } else {
 
             tokens.push_back(std::string(cur, cur + 1));
-            DEBUG(stderr, "OTHER \"%c\"\n", *cur);
+            DEBUGF(stderr, "OTHER \"%c\"\n", *cur);
             ++cur;
 
           }
@@ -593,19 +718,21 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     } else {
 
       // alternative tokenize
-
-      while (regex_search(cur, ende, match, regex_string,
-                          regex_constants::match_any |
-                              regex_constants::match_not_null |
-                              regex_constants::match_continuous)) {
+      while (my_search_string(cur, ende, &match_begin, &match_end)) {
 
         prev = cur;
-        found = match[0].first;
-        cur = match[0].second;
-        DEBUG(stderr,
-              "string %s found at start %lu offset %lu continue at %lu\n",
-              match[0].str().c_str(), prev - input.begin(), match.position(),
-              cur - input.begin());
+        found = match_begin;
+        cur = match_end;
+        IFDEBUG {
+
+          string foo(match_begin, match_end);
+          DEBUGF(stderr,
+                 "string %s found at start %lu offset %lu continue at %lu\n",
+                 foo.c_str(), prev - input.begin(), found - prev,
+                 cur - input.begin());
+
+        }
+
         if (prev < found) {  // there are items between search start and find
           sregex_token_iterator it{prev, found, regex_whitespace, -1};
           vector<std::string>   tokenized{it, {}};
@@ -619,10 +746,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
                           tokenized.end());
           tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
 
-          if (unlikely(debug)) {
+          IFDEBUG {
 
-            DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
-                  input.size());
+            DEBUGF(stderr, "tokens1: %lu   input size: %lu\n", tokenized.size(),
+                   input.size());
             for (auto x : tokenized) {
 
               cerr << x << endl;
@@ -636,10 +763,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             string::const_iterator c = token.begin(), e = token.end(), f, p;
             smatch                 m;
 
-            while (regex_search(c, e, m, regex_word,
-                                regex_constants::match_any |
-                                    regex_constants::match_not_null |
-                                    regex_constants::match_continuous)) {
+            while (regex_search(c, e, m, regex_word)) {
 
               p = c;
               f = m[0].first;
@@ -649,10 +773,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
                 // there are items between search start and find
                 while (p < f) {
 
-                  if (unlikely(debug)) {
+                  IFDEBUG {
 
                     string foo(p, p + 1);
-                    DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+                    DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
 
                   }
 
@@ -661,20 +785,21 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
                 }
 
-                /*
-                                string foo(p, f);
-                                DEBUG(stderr, "before string: \"%s\"\n",
-                   foo.c_str()); tokens.push_back(std::string(p, f));
-                */
+                IFDEBUG {
+
+                  string foo(p, f);
+                  DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
+                  tokens.push_back(std::string(p, f));
+
+                }
 
               }
 
-              DEBUG(
-                  stderr,
-                  "SUBstring \"%s\" found at start %lu offset %lu continue at "
-                  "%lu\n",
-                  m[0].str().c_str(), p - input.begin(), m.position(),
-                  c - token.begin());
+              DEBUGF(stderr,
+                     "SUBstring \"%s\" found at start %lu offset %lu continue "
+                     "at %lu\n",
+                     m[0].str().c_str(), p - input.begin(), m.position(),
+                     c - token.begin());
               tokens.push_back(m[0].str());
 
             }
@@ -683,10 +808,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               while (c < e) {
 
-                if (unlikely(debug)) {
+                IFDEBUG {
 
                   string foo(c, c + 1);
-                  DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+                  DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
                 }
 
@@ -695,17 +820,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               }
 
-              /*
-                            if (unlikely(debug)) {
+              IFDEBUG {
 
-                              string foo(c, e);
-                              DEBUG(stderr, "after string: \"%s\"\n",
-                 foo.c_str());
+                string foo(c, e);
+                DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
-                            }
+              }
 
-                            tokens.push_back(std::string(c, e));
-              */
+              tokens.push_back(std::string(c, e));
 
             }
 
@@ -713,7 +835,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
         }
 
-        if (match[0].length() > 0) { tokens.push_back(match[0]); }
+        tokens.push_back(string(match_begin, match_end));
 
       }
 
@@ -727,10 +849,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
             tokenized.end());
         tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
 
-        if (unlikely(debug)) {
+        IFDEBUG {
 
-          DEBUG(stderr, "tokens: %lu   input size: %lu\n", tokenized.size(),
-                input.size());
+          DEBUGF(stderr, "tokens2: %lu   input size: %lu\n", tokenized.size(),
+                 input.size());
           for (auto x : tokenized) {
 
             cerr << x << endl;
@@ -744,10 +866,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
           string::const_iterator c = token.begin(), e = token.end(), f, p;
           smatch                 m;
 
-          while (regex_search(c, e, m, regex_word,
-                              regex_constants::match_any |
-                                  regex_constants::match_not_null |
-                                  regex_constants::match_continuous)) {
+          while (regex_search(c, e, m, regex_word)) {
 
             p = c;
             f = m[0].first;
@@ -757,10 +876,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
               // there are items between search start and find
               while (p < f) {
 
-                if (unlikely(debug)) {
+                IFDEBUG {
 
                   string foo(p, p + 1);
-                  DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+                  DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
 
                 }
 
@@ -769,25 +888,22 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
               }
 
-              /*
-                            if (unlikely(debug)) {
+              IFDEBUG {
 
-                              string foo(p, f);
-                              DEBUG(stderr, "before string: \"%s\"\n",
-                 foo.c_str());
+                string foo(p, f);
+                DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
 
-                            }
+              }
 
-                            tokens.push_back(std::string(p, f));
-              */
+              tokens.push_back(std::string(p, f));
 
             }
 
-            DEBUG(stderr,
-                  "SUB2string \"%s\" found at start %lu offset %lu continue at "
-                  "%lu\n",
-                  m[0].str().c_str(), p - input.begin(), m.position(),
-                  c - token.begin());
+            DEBUGF(stderr,
+                   "SUB2string \"%s\" found at start %lu offset %lu continue "
+                   "at %lu\n",
+                   m[0].str().c_str(), p - input.begin(), m.position(),
+                   c - token.begin());
             tokens.push_back(m[0].str());
 
           }
@@ -796,10 +912,10 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
             while (c < e) {
 
-              if (unlikely(debug)) {
+              IFDEBUG {
 
                 string foo(c, c + 1);
-                DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+                DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
               }
 
@@ -808,16 +924,14 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
             }
 
-            /*
-                        if (unlikely(debug)) {
+            IFDEBUG {
 
-                          string foo(c, e);
-                          DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+              string foo(c, e);
+              DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
 
-                        }
+            }
 
-                        tokens.push_back(std::string(c, e));
-            */
+            tokens.push_back(std::string(c, e));
 
           }
 
@@ -827,22 +941,22 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
     }
 
-    if (unlikely(debug)) {
+    IFDEBUG {
 
-      DEBUG(stderr, "DUMPING TOKENS:\n");
+      DEBUGF(stderr, "DUMPING TOKENS:\n");
       u32 size_1 = tokens.size() - 1;
       for (u32 i = 0; i < tokens.size(); ++i) {
 
-        DEBUG(stderr, "%s", tokens[i].c_str());
+        DEBUGF(stderr, "%s", tokens[i].c_str());
         if (unlikely(alternative_tokenize && i < size_1)) {
 
-          DEBUG(stderr, "%s", whitespace.c_str());
+          DEBUGF(stderr, "%s", whitespace.c_str());
 
         }
 
       }
 
-      DEBUG(stderr, "---------------------------\n");
+      DEBUGF(stderr, "---------------------------\n");
 
     }
 
@@ -850,7 +964,7 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
 
       file_mapping[fn] = NULL;
       s = NULL;
-      DEBUG(stderr, "too few tokens\n");
+      DEBUGF(stderr, "too few tokens\n");
       return 0;
 
     }
@@ -886,21 +1000,23 @@ extern "C" unsigned char afl_custom_queue_get(void                *data,
     all_structure_items += structure->size();
 
     // we are done!
-    DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
-          structure->size());
+    DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n",
+           structure->size());
+
+  }
 
-  } else {
+  else {
 
     if (entry->second == NULL) {
 
-      DEBUG(stderr, "Skipping %s\n", filename);
+      DEBUGF(stderr, "Skipping %s\n", filename);
       s = NULL;
       return 0;
 
     }
 
     s = entry->second;
-    DEBUG(stderr, "OK %s\n", filename);
+    DEBUGF(stderr, "OK %s\n", filename);
 
   }
 
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index 322caa5b..82131c92 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -150,6 +150,7 @@ def deinit():  # optional for Python
     sense to use it. You would only skip this if `post_process` is used to fix
     checksums etc. so if you are using it, e.g., as a post processing library.
     Note that a length > 0 *must* be returned!
+    The returned output buffer is under **your** memory management!
 
 - `describe` (optional):
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 1e8d085d..229bc025 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -844,15 +844,16 @@ struct custom_mutator {
   /**
    * Perform custom mutations on a given input
    *
-   * (Optional for now. Required in the future)
+   * (Optional)
    *
-   * @param data pointer returned in afl_custom_init by this custom mutator
+   * Getting an add_buf can be skipped by using afl_custom_splice_optout().
+   *
+   * @param[in] data Pointer returned in afl_custom_init by this custom mutator
    * @param[in] buf Pointer to the input data to be mutated and the mutated
    *     output
    * @param[in] buf_size Size of the input/output data
-   * @param[out] out_buf the new buffer. We may reuse *buf if large enough.
-   *             *out_buf = NULL is treated as FATAL.
-   * @param[in] add_buf Buffer containing the additional test case
+   * @param[out] out_buf The new buffer, under your memory mgmt.
+   * @param[in] add_buf Buffer containing an additional test case (splicing)
    * @param[in] add_buf_size Size of the additional test case
    * @param[in] max_size Maximum size of the mutated output. The mutation must
    * not produce data larger than max_size.
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index 5e352dcb..bd482562 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -564,8 +564,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
       if (afl->cmplog_lvl == 3 ||
           (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) ||
           afl->queue_cur->favored ||
-          !(afl->fsrv.total_execs % afl->queued_items) ||
-          get_cur_time() - afl->last_find_time > 300000) {  // 300 seconds
+          get_cur_time() - afl->last_find_time > 600000) {  // 600 seconds
 
         if (input_to_state_stage(afl, in_buf, out_buf, len)) {
 
-- 
cgit 1.4.1


From f756734ad2782c3ed56feadb4b7b23fc82a7a968 Mon Sep 17 00:00:00 2001
From: vanhauser-thc <vh@thc.org>
Date: Thu, 13 Apr 2023 12:07:27 +0200
Subject: fix attempt at post_process implementation

---
 docs/Changelog.md       |  1 +
 docs/custom_mutators.md |  9 +++++++--
 include/afl-fuzz.h      |  9 +++++++--
 src/afl-fuzz-python.c   | 49 +++++++++----------------------------------------
 src/afl-fuzz-run.c      |  7 ++++++-
 5 files changed, 30 insertions(+), 45 deletions(-)

(limited to 'docs/custom_mutators.md')

diff --git a/docs/Changelog.md b/docs/Changelog.md
index 501300b1..9436fc9f 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -13,6 +13,7 @@
       everyone who was affected!
     - allow pizza mode to be disabled when AFL_PIZZA_MODE is set to -1
     - option `-p mmopt` now also selects new queue items more often
+    - fix bug in post_process custom mutator implementation
     - print name of custom mutator in UI
   - afl-cc:
     - add CFI sanitizer variant to gcc targets
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index 82131c92..a1de479e 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -118,7 +118,7 @@ def deinit():  # optional for Python
 
 ### Custom Mutation
 
-- `init`:
+- `init` (optional in Python):
 
     This method is called when AFL++ starts up and is used to seed RNG and set
     up buffers and state.
@@ -184,6 +184,11 @@ def deinit():  # optional for Python
     to the target, e.g. if it is too short, too corrupted, etc. If so,
     return a NULL buffer and zero length (or a 0 length string in Python).
 
+    NOTE: Do not make any random changes to the data in this function!
+
+    PERFORMANCE for C/C++: If possible make the changes in-place (so modify
+    the `*data` directly, and return it as `*outbuf = data`.
+
 - `fuzz_send` (optional):
 
     This method can be used if you want to send data to the target yourself,
@@ -202,7 +207,7 @@ def deinit():  # optional for Python
     discovered if compiled with INTROSPECTION. The custom mutator can then
     return a string (const char *) that reports the exact mutations used.
 
-- `deinit`:
+- `deinit` (optional in Python):
 
     The last method to be called, deinitializing the state.
 
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 5fd393dd..8b6502b4 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -885,14 +885,19 @@ struct custom_mutator {
    * A post-processing function to use right before AFL writes the test case to
    * disk in order to execute the target.
    *
-   * (Optional) If this functionality is not needed, simply don't define this
+   * NOTE: Do not do any random changes to the data in this function!
+   *
+   * PERFORMANCE: If you can modify the data in-place you will have a better
+   *              performance. Modify *data and set `*out_buf = data`.
+   *
+   * (Optional) If this functionality is not needed, simply do not define this
    * function.
    *
    * @param[in] data pointer returned in afl_custom_init by this custom mutator
    * @param[in] buf Buffer containing the test case to be executed
    * @param[in] buf_size Size of the test case
    * @param[out] out_buf Pointer to the buffer storing the test case after
-   *     processing. External library should allocate memory for out_buf.
+   *     processing. The external library should allocate memory for out_buf.
    *     It can chose to alter buf in-place, if the space is large enough.
    * @return Size of the output buffer.
    */
diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c
index 673e5a6c..7dad0770 100644
--- a/src/afl-fuzz-python.c
+++ b/src/afl-fuzz-python.c
@@ -219,11 +219,14 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
 
   if (py_module != NULL) {
 
-    u8 py_notrim = 0, py_idx;
-    /* init, required */
+    u8 py_notrim = 0;
     py_functions[PY_FUNC_INIT] = PyObject_GetAttrString(py_module, "init");
-    if (!py_functions[PY_FUNC_INIT])
-      FATAL("init function not found in python module");
+    if (!py_functions[PY_FUNC_INIT]) {
+
+      WARNF("init function not found in python module");
+
+    }
+
     py_functions[PY_FUNC_FUZZ] = PyObject_GetAttrString(py_module, "fuzz");
     if (!py_functions[PY_FUNC_FUZZ])
       py_functions[PY_FUNC_FUZZ] = PyObject_GetAttrString(py_module, "mutate");
@@ -231,12 +234,6 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
         PyObject_GetAttrString(py_module, "describe");
     py_functions[PY_FUNC_FUZZ_COUNT] =
         PyObject_GetAttrString(py_module, "fuzz_count");
-    if (!py_functions[PY_FUNC_FUZZ]) {
-
-      WARNF("fuzz function not found in python module");
-
-    }
-
     py_functions[PY_FUNC_POST_PROCESS] =
         PyObject_GetAttrString(py_module, "post_process");
     py_functions[PY_FUNC_INIT_TRIM] =
@@ -263,36 +260,6 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
     if (!py_functions[PY_FUNC_DEINIT])
       WARNF("deinit function not found in python module");
 
-    for (py_idx = 0; py_idx < PY_FUNC_COUNT; ++py_idx) {
-
-      if (!py_functions[py_idx] || !PyCallable_Check(py_functions[py_idx])) {
-
-        if (py_idx >= PY_FUNC_INIT_TRIM && py_idx <= PY_FUNC_TRIM) {
-
-          // Implementing the trim API is optional for now
-          if (PyErr_Occurred()) { PyErr_Print(); }
-          py_notrim = 1;
-
-        } else if (py_idx >= PY_OPTIONAL) {
-
-          // Only _init and _deinit are not optional currently
-
-          if (PyErr_Occurred()) { PyErr_Print(); }
-
-        } else {
-
-          fprintf(stderr,
-                  "Cannot find/call function with index %d in external "
-                  "Python module.\n",
-                  py_idx);
-          return NULL;
-
-        }
-
-      }
-
-    }
-
     if (py_notrim) {
 
       py_functions[PY_FUNC_INIT_TRIM] = NULL;
@@ -345,6 +312,8 @@ static void init_py(afl_state_t *afl, py_mutator_t *py_mutator,
 
   (void)afl;
 
+  if (py_mutator->py_functions[PY_FUNC_INIT] == NULL) { return; }
+
   PyObject *py_args, *py_value;
 
   /* Provide the init function a seed for the Python RNG */
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index f5425011..26e8549d 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -133,7 +133,12 @@ write_to_testcase(afl_state_t *afl, void **mem, u32 len, u32 fix) {
 
     }
 
-    if (new_mem != *mem) { *mem = new_mem; }
+    if (new_mem != *mem && new_mem != NULL && new_size > 0) {
+
+      *mem = afl_realloc((void **)mem, new_size);
+      memmove(*mem, new_mem, new_size);
+
+    }
 
     if (unlikely(afl->custom_mutators_count)) {
 
-- 
cgit 1.4.1